diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index 8203e24..0000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: CI - -on: - push: - branches: [master] - pull_request: - branches: [master] - -jobs: - tests: - name: Tests (Python ${{ matrix.python-version }}) - runs-on: ubuntu-latest - services: - hbase: - image: dajobe/hbase - ports: - - 9090:9090 - - strategy: - matrix: - python-version: ['3.10', '3.11', '3.12', '3.13', '3.14'] - - steps: - - uses: actions/checkout@v4 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Install dependencies - run: pip install pytest pytest-cov -r requirements.txt - - - name: Wait for HBase Thrift server - run: | - echo "Waiting for HBase Thrift server on port 9090..." - timeout 120 bash -c 'until nc -z localhost 9090; do echo "Still waiting..."; sleep 5; done' - echo "HBase Thrift server is ready" - - - name: Run tests - env: - HAPPYBASE_HOST: localhost - HAPPYBASE_PORT: "9090" - run: pytest tests/ -v --tb=short diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 39b23b2..0000000 --- a/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -*.py[co] -*.egg-info/ -.coverage -.tox/ -build/ -coverage/ -dist/ -doc/build/ diff --git a/LICENSE.rst b/LICENSE.rst deleted file mode 100644 index 8b96f17..0000000 --- a/LICENSE.rst +++ /dev/null @@ -1,216 +0,0 @@ -******* -License -******* - -HappyBase itself is licensed under a `MIT License -`_. HappyBase contains code originating -from HBase sources, licensed under the `Apache License -`_ (version 2.0). Both license texts are -included below. - - -HappyBase License -================= - -(This is the `MIT License `_.) - -Copyright © 2012 Wouter Bolsterlee - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - - - -HBase License -============= - -(This is the `Apache License `_, version 2.0, -January 2004.) - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 0f348ef..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,5 +0,0 @@ -include Makefile -include requirements.txt -include happybase/*.thrift -include *.rst -include doc/conf.py doc/*.rst diff --git a/Makefile b/Makefile deleted file mode 100644 index 529bab3..0000000 --- a/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -.PHONY: all doc test clean - -all: doc clean - -doc: - python setup.py build_sphinx - @echo - @echo Generated documentation: "file://"$$(readlink -f doc/build/html/index.html) - @echo - -test: - -find coverage/ -mindepth 1 -delete - pytest $${TESTS} - -clean: - find . -name '*.py[co]' -delete - -dist: test - python setup.py sdist diff --git a/NEWS.rst b/NEWS.rst deleted file mode 100644 index 04302a1..0000000 --- a/NEWS.rst +++ /dev/null @@ -1,237 +0,0 @@ -Version history -=============== - -.. py:currentmodule:: happybase - - -HappyBase 1.3.0 ---------------- - -Release date: 2025-09-26 - -* Switch to pytest - (`pr 238 `_) -* Use ``importlib_resources`` package because ``pkg_resources`` is - deprecated in Python 3.12 - (`pr 261 `_) - - -HappyBase 1.2.0 ---------------- - -Release date: 2019-05-14 - -* Switch from ``thriftpy`` to its successor ``thriftpy2``, - which supports Python 3.7. - (`issue #221 `_, - `pr 222 `_) - - -HappyBase 1.1.0 ---------------- - -Release date: 2017-04-03 - -* Set socket timeout unconditionally on ``TSocket`` - (`#146 `_) - -* Add new ‘0.98’ compatibility mode - (`#155 `_) - -* Add support for reversed scanners - (`#67 `_, - `#155 `_) - - -HappyBase 1.0.0 ---------------- - -Release date: 2016-08-13 - -* First 1.x.y release! - - From now on this library uses a semantic versioning scheme. - HappyBase is a mature library, but always used 0.x version numbers - for no good reason. This has now changed. - -* Finally, Python 3 support. Thanks to all the people who contributed! - (`issue #40 `_, - `pr 116 `_, - `pr 108 `_, - `pr 111 `_) - -* Switch to thriftpy as the underlying Thrift library, which is a much - nicer and better maintained library. - -* Enable building universal wheels - (`issue 78 `_) - - -HappyBase 0.9 -------------- - -Release date: 2014-11-24 - -* Fix an issue where scanners would return fewer results than expected due to - HBase not always behaving as its documentation suggests (`issue #72 - `_). - -* Add support for the Thrift compact protocol (``TCompactProtocol``) in - :py:class:`Connection` (`issue #70 - `_). - - -HappyBase 0.8 -------------- - -Release date: 2014-02-25 - -* Add (and default to) '0.96' compatibility mode in :py:class:`Connection`. - -* Add support for retrieving sorted columns, which is possible with the HBase - 0.96 Thrift API. This feature uses a new `sorted_columns` argument to - :py:meth:`Table.scan`. An ``OrderedDict`` implementation is required for this - feature; with Python 2.7 this is available from the standard library, but for - Python 2.6 a separate ``ordereddict`` package has to be installed from PyPI. - (`issue #39 `_) - -* The `batch_size` argument to :py:meth:`Table.scan` is no longer propagated to - `Scan.setBatching()` at the Java side (inside the Thrift server). To influence - the `Scan.setBatching()` (which may split rows into partial rows) a new - `scan_batching` argument to :py:meth:`Table.scan` has been added. See `issue - #54 `_, `issue #56 - `_, and the HBase docs for - `Scan.setBatching()` for more details. - - -HappyBase 0.7 -------------- - -Release date: 2013-11-06 - -* Added a `wal` argument to various data manipulation methods on the - :py:class:`Table` and :py:class:`Batch` classes to determine whether to write - the mutation to the Write-Ahead Log (WAL). (`issue #36 - `_) - -* Pass batch_size to underlying Thrift Scan instance (`issue #38 - `_). - -* Expose server name and port in :py:meth:`Table.regions` (recent HBase versions - only) (`issue #37 `_). - -* Regenerated bundled Thrift API modules using a recent upstream Thrift API - definition. This is required to expose newly added API. - - -HappyBase 0.6 -------------- - -Release date: 2013-06-12 - -* Rewrote exception handling in connection pool. Exception handling is now a lot - cleaner and does not introduce cyclic references anymore. (`issue #25 - `_). - -* Regenerated bundled Thrift code using Thrift 0.9.0 with the new-style classes - flag (`issue #27 `_). - - -HappyBase 0.5 -------------- - -Release date: 2013-05-24 - -* Added a thread-safe connection pool (:py:class:`ConnectionPool`) to keep - connections open and share them between threads (`issue #21 - `_). - -* The :py:meth:`Connection.delete_table` method now features an optional - `disable` parameter to make deleting enabled tables easier. - -* The debug log message emitted by :py:meth:`Table.scan` when closing a scanner - now includes both the number of rows returned to the calling code, and also - the number of rows actually fetched from the server. If scanners are not - completely iterated over (e.g. because of a 'break' statement in the for loop - for the scanner), these numbers may differ. If this happens often, and the - differences are big, this may be a hint that the `batch_size` parameter to - :py:meth:`Table.scan()` is not optimal for your application. - -* Increased Thrift dependency to at least 0.8. Older versions are no longer - available from PyPI. HappyBase should not be used with obsoleted Thrift - versions. - -* The :py:class:`Connection` constructor now features an optional `timeout` - parameter to to specify the timeout to use for the Thrift socket (`issue #15 - `_) - -* The `timestamp` argument to various methods now also accepts `long` values in - addition to `int` values. This fixes problems with large timestamp values on - 32-bit systems. (`issue #23 - `_). - -* In some corner cases exceptions were raised during interpreter shutdown while - closing any remaining open connections. (`issue #18 - `_) - - -HappyBase 0.4 -------------- - -Release date: 2012-07-11 - -* Add an optional `table_prefix_separator` argument to the - :py:class:`Connection` constructor, to specify the prefix used for the - `table_prefix` argument (`issue #3 - `_) -* Add support for framed Thrift transports using a new optional `transport` - argument to :py:class:`Connection` (`issue #6 - `_) -* Add the Apache license conditions in the :doc:`license statement ` - (for the included HBase parts) -* Documentation improvements - - -HappyBase 0.3 -------------- - -Release date: 2012-05-25 - -New features: - -* Improved compatibility with HBase 0.90.x - - * In earlier versions, using :py:meth:`Table.scan` in combination with HBase - 0.90.x often resulted in crashes, caused by incompatibilities in the - underlying Thrift protocol. - * A new `compat` flag to the :py:class:`Connection` constructor has been - added to enable compatibility with HBase 0.90.x. - * Note that the :py:meth:`Table.scan` API has a few limitations when used - with HBase 0.90.x. - -* The `row_prefix` argument to :py:meth:`Table.scan` can now be used together - with `filter` and `timestamp` arguments. - -Other changes: - -* Lower Thrift dependency to 0.6 -* The `setup.py` script no longer installs the tests -* Documentation improvements - - -HappyBase 0.2 -------------- - -Release date: 2012-05-22 - -* Fix package installation, so that ``pip install happybase`` works as expected - (`issue #1 `_) -* Various small documentation improvements - - -HappyBase 0.1 -------------- - -Release date: 2012-05-20 - -* Initial release diff --git a/README.rst b/README.rst deleted file mode 100644 index c29dadc..0000000 --- a/README.rst +++ /dev/null @@ -1,16 +0,0 @@ -HappyBase -========= - -**HappyBase** is a developer-friendly Python_ library to interact with Apache -HBase_. - -* `Documentation `_ (Read the Docs) -* `Downloads `_ (PyPI) -* `Source code `_ (Github) - -.. _Python: http://python.org/ -.. _HBase: http://hbase.apache.org/ - -.. If you're reading this from the README.rst file in a source tree, - you can generate the HTML documentation by running "make doc" and browsing - to doc/build/html/index.html to see the result. diff --git a/TODO.rst b/TODO.rst deleted file mode 100644 index 3178e84..0000000 --- a/TODO.rst +++ /dev/null @@ -1,18 +0,0 @@ -.. Note: this list is automatically included in the documentation. - -*********************************** -To-do list and possible future work -*********************************** - -This document lists some ideas that the developers thought of, but have not yet -implemented. The topics described below may be implemented (or not) in the -future, depending on time, demand, and technical possibilities. - -* Improved error handling instead of just propagating the errors from the - Thrift layer. Maybe wrap the errors in a HappyBase.Error? - -* Automatic retries for failed operations (but only those that can be retried) - -* Port HappyBase over to the (still experimental) HBase Thrift2 API when it - becomes mainstream, and expose more of the underlying features nicely in the - HappyBase API. diff --git a/doc/api.rst b/doc/api.rst deleted file mode 100644 index 2b1e3b7..0000000 --- a/doc/api.rst +++ /dev/null @@ -1,59 +0,0 @@ -============= -API reference -============= - -.. py:currentmodule:: happybase - -This chapter contains detailed API documentation for HappyBase. It is suggested -to read the :doc:`user guide ` first to get a general idea about how -HappyBase works. - -The HappyBase API is organised as follows: - -:py:class:`~happybase.Connection`: - The :py:class:`~happybase.Connection` class is the main entry point for - application developers. It connects to the HBase Thrift server and provides - methods for table management. - -:py:class:`~happybase.Table`: - The :py:class:`Table` class is the main class for interacting with data in - tables. This class offers methods for data retrieval and data manipulation. - Instances of this class can be obtained using the - :py:meth:`Connection.table()` method. - -:py:class:`~happybase.Batch`: - The :py:class:`Batch` class implements the batch API for data manipulation, - and is available through the :py:meth:`Table.batch()` method. - -:py:class:`~happybase.ConnectionPool`: - The :py:class:`ConnectionPool` class implements a thread-safe connection - pool that allows an application to (re)use multiple connections. - - -Connection -========== - -.. autoclass:: happybase.Connection - - -Table -===== - -.. autoclass:: happybase.Table - - -Batch -===== - -.. autoclass:: happybase.Batch - - -Connection pool -=============== - -.. autoclass:: happybase.ConnectionPool - -.. autoclass:: happybase.NoConnectionsAvailable - - -.. vim: set spell spelllang=en: diff --git a/doc/conf.py b/doc/conf.py deleted file mode 100644 index 4fead40..0000000 --- a/doc/conf.py +++ /dev/null @@ -1,250 +0,0 @@ -# -*- coding: utf-8 -*- -# -# HappyBase documentation build configuration file, created by -# sphinx-quickstart on Tue Mar 20 17:40:16 2012. -# -# This file is execfile()d with the current directory set to its containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -import sys, os - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) - -# -- General configuration ----------------------------------------------------- - -# If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be extensions -# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.coverage'] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix of source filenames. -source_suffix = '.rst' - -# The encoding of source files. -#source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = u'HappyBase' -copyright = u'2012' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -version_file = os.path.join( - os.path.dirname(__file__), - '../happybase/_version.py') -with open(version_file, 'r') as fp: - exec(fp.read()) -version = __version__ - -# The full version, including alpha/beta/rc tags. -release = __version__ - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -#language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -#today = '' -# Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -exclude_patterns = [] - -# The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -#add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -#show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] - -autodoc_default_flags = ['members', 'undoc-members'] -autodoc_member_order = 'bysource' - -# -- Options for HTML output --------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = 'default' - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -#html_theme_options = {} - -# Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] - -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -#html_title = None - -# A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -#html_logo = None - -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -#html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -#html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -#html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -#html_additional_pages = {} - -# If false, no module index is generated. -#html_domain_indices = True - -# If false, no index is generated. -#html_use_index = True - -# If true, the index is split into individual pages for each letter. -#html_split_index = False - -# If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -#html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None - -# Output file base name for HTML help builder. -#htmlhelp_basename = 'HappyBasedoc' - - -# -- Options for LaTeX output -------------------------------------------------- - -#latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', -#} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, author, documentclass [howto/manual]). -#latex_documents = [ -# ('index', 'HappyBase.tex', u'HappyBase Documentation', -# u' ', 'manual'), -#] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -#latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -#latex_use_parts = False - -# If true, show page references after internal links. -#latex_show_pagerefs = False - -# If true, show URL addresses after external links. -#latex_show_urls = False - -# Documents to append as an appendix to all manuals. -#latex_appendices = [] - -# If false, no module index is generated. -#latex_domain_indices = True - - -# -- Options for manual page output -------------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -#man_pages = [ -# ('index', 'happybase', u'HappyBase Documentation', -# [u' '], 1) -#] - -# If true, show URL addresses after external links. -#man_show_urls = False - - -# -- Options for Texinfo output ------------------------------------------------ - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -#texinfo_documents = [ -# ('index', 'HappyBase', u'HappyBase Documentation', -# u' ', 'HappyBase', 'One line description of project.', -# 'Miscellaneous'), -#] - -# Documents to append as an appendix to all manuals. -#texinfo_appendices = [] - -# If false, no module index is generated. -#texinfo_domain_indices = True - -# How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' diff --git a/doc/development.rst b/doc/development.rst deleted file mode 100644 index c835ea0..0000000 --- a/doc/development.rst +++ /dev/null @@ -1,61 +0,0 @@ -*********** -Development -*********** - -.. highlight:: sh - -Getting the source ------------------- - -The HappyBase source code repository is hosted on GitHub: - - https://github.com/wbolster/happybase - -To grab a copy, use this:: - - $ git clone https://github.com/wbolster/happybase.git - - - -Setting up a development environment ------------------------------------- - -Setting up a development environment from a Git branch is easy:: - - $ cd /path/to/happybase/ - $ mkvirtualenv happybase - (happybase)$ pip install -r test-requirements.txt - (happybase)$ pip install -e . - - -Running the tests ------------------ - -The tests use the `nose` test suite. To execute the tests, run:: - - (happybase)$ make test - -Test outputs are shown on the console. A test code coverage report is saved in -`coverage/index.html`. - -If the Thrift server is not running on localhost, you can specify these -environment variables (both are optional) before running the tests:: - - (happybase)$ export HAPPYBASE_HOST=host.example.org - (happybase)$ export HAPPYBASE_PORT=9091 - -To test the HBase 0.90 compatibility mode, use this:: - - (happybase)$ export HAPPYBASE_COMPAT=0.90 - -To test the framed Thrift transport mode, use this:: - - (happybase)$ export HAPPYBASE_TRANSPORT=framed - -Contributing ------------- - -Feel free to report any issues on GitHub. Patches and merge requests are also -most welcome. - -.. vim: set spell spelllang=en: diff --git a/doc/faq.rst b/doc/faq.rst deleted file mode 100644 index 025b4bd..0000000 --- a/doc/faq.rst +++ /dev/null @@ -1,63 +0,0 @@ -========================== -Frequently asked questions -========================== - - -I love HappyBase! Can I donate? -=============================== - -Thanks, I'm glad to hear that you appreciate my work! If you feel like, please -make a small donation_ to sponsor my (spare time!) work on HappyBase. Small -gestures are really motivating for me and help me keep this project going! - -.. _donation: https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=ZJ9U8DNN6KZ9Q - - -Why not use the Thrift API directly? -==================================== - -While the HBase Thrift API can be used directly from Python using (automatically -generated) HBase Thrift service classes, application code doing so is very -verbose, cumbersome to write, and hence error-prone. The reason for this is that -the HBase Thrift API is a flat, language-agnostic interface API closely tied to -the RPC going over the wire-level protocol. In practice, this means that -applications using Thrift directly need to deal with many imports, sockets, -transports, protocols, clients, Thrift types and mutation objects. For instance, -look at the code required to connect to HBase and store two values:: - - from thrift import Thrift - from thrift.transport import TSocket, TTransport - from thrift.protocol import TBinaryProtocol - - from hbase import ttypes - from hbase.Hbase import Client, Mutation - - sock = TSocket.TSocket('hostname', 9090) - transport = TTransport.TBufferedTransport(sock) - protocol = TBinaryProtocol.TBinaryProtocol(transport) - client = Client(protocol) - transport.open() - - mutations = [Mutation(column='family:qual1', value='value1'), - Mutation(column='family:qual2', value='value2')] - client.mutateRow('table-name', 'row-key', mutations) - -:pep:`20` taught us that simple is better than complex, and as you can see, -Thrift is certainly complex. HappyBase hides all the Thrift cruft below a -friendly API. The resulting application code will be cleaner, more productive -to write, and more maintainable. With HappyBase, the example above can be -simplified to this:: - - import happybase - - connection = happybase.Connection('hostname') - table = connection.table('table-name') - table.put('row-key', {'family:qual1': 'value1', - 'family:qual2': 'value2'}) - -If you're not convinced and still think the Thrift API is not that bad, please -try to accomplish some other common tasks, e.g. retrieving rows and scanning -over a part of a table, and compare that to the HappyBase equivalents. If -you're still not convinced by then, we're sorry to inform you that HappyBase is -not the project for you, and we wish you all of luck maintaining your code ‒ or -is it just Thrift boilerplate? diff --git a/doc/index.rst b/doc/index.rst deleted file mode 100644 index 53e2eef..0000000 --- a/doc/index.rst +++ /dev/null @@ -1,97 +0,0 @@ -========= -HappyBase -========= - -.. py:currentmodule:: happybase - -**HappyBase** is a developer-friendly Python__ library to interact with `Apache -HBase`__. HappyBase is designed for use in standard HBase setups, and offers -application developers a Pythonic API to interact with HBase. Below the surface, -HappyBase uses the `Python ThriftPy2 library`__ to connect to HBase using its -Thrift__ gateway, which is included in the standard HBase 0.9x releases. - -__ http://python.org/ -__ http://hbase.apache.org/ -__ http://pypi.python.org/pypi/thriftpy2 -__ http://thrift.apache.org/ - - -.. note:: - - **Do you enjoy HappyBase?** Great! You should know that I don't use HappyBase - myself anymore, but still maintain it because it's quite popular. Please - consider making a small donation__ to let me know you appreciate my work. - Thanks! - - __ https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=ZJ9U8DNN6KZ9Q - - -Example -======= - -The example below illustrates basic usage of the library. The :doc:`user guide -` contains many more examples. - -:: - - import happybase - - connection = happybase.Connection('hostname') - table = connection.table('table-name') - - table.put(b'row-key', {b'family:qual1': b'value1', - b'family:qual2': b'value2'}) - - row = table.row(b'row-key') - print(row[b'family:qual1']) # prints 'value1' - - for key, data in table.rows([b'row-key-1', b'row-key-2']): - print(key, data) # prints row key and data for each row - - for key, data in table.scan(row_prefix=b'row'): - print(key, data) # prints 'value1' and 'value2' - - row = table.delete(b'row-key') - - -Core documentation -================== - -.. toctree:: - :maxdepth: 2 - - installation - user - api - - -Additional documentation -======================== - -.. toctree:: - :maxdepth: 1 - - news - development - todo - faq - license - - -External links -============== - -* `Online documentation `_ (Read the Docs) -* `Downloads `_ (PyPI) -* `Source code `_ (Github) - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` - - -.. vim: set spell spelllang=en: diff --git a/doc/installation.rst b/doc/installation.rst deleted file mode 100644 index e575488..0000000 --- a/doc/installation.rst +++ /dev/null @@ -1,62 +0,0 @@ -================== -Installation guide -================== - -.. highlight:: sh - -This guide describes how to install HappyBase. - -.. contents:: On this page - :local: - - -Setting up a virtual environment -================================ - -The recommended way to install HappyBase and Thrift is to use a virtual -environment created by `virtualenv`. Setup and activate a new virtual -environment like this:: - - $ virtualenv envname - $ source envname/bin/activate - -If you use the `virtualenvwrapper` scripts, type this instead:: - - $ mkvirtualenv envname - - -Installing the HappyBase package -================================ - -The next step is to install HappyBase. The easiest way is to use `pip` to fetch -the package from the `Python Package Index `_ (PyPI). -This will also install the Thrift package for Python. - -:: - - (envname) $ pip install happybase - -.. note:: - - Generating and installing the HBase Thrift Python modules (using ``thrift - --gen py`` on the ``.thrift`` file) is not necessary, since HappyBase - bundles pregenerated versions of those modules. - - -Testing the installation -======================== - -Verify that the packages are installed correctly:: - - (envname) $ python -c 'import happybase' - -If you don't see any errors, the installation was successful. Congratulations! - - -.. rubric:: Next steps - -Now that you successfully installed HappyBase on your machine, continue with -the :doc:`user guide ` to learn how to use it. - - -.. vim: set spell spelllang=en: diff --git a/doc/license.rst b/doc/license.rst deleted file mode 100644 index 68c5792..0000000 --- a/doc/license.rst +++ /dev/null @@ -1 +0,0 @@ -.. include:: ../LICENSE.rst diff --git a/doc/news.rst b/doc/news.rst deleted file mode 100644 index 291074a..0000000 --- a/doc/news.rst +++ /dev/null @@ -1 +0,0 @@ -.. include:: ../NEWS.rst diff --git a/doc/todo.rst b/doc/todo.rst deleted file mode 100644 index 122b033..0000000 --- a/doc/todo.rst +++ /dev/null @@ -1 +0,0 @@ -.. include:: ../TODO.rst diff --git a/doc/user.rst b/doc/user.rst deleted file mode 100644 index 7e397e4..0000000 --- a/doc/user.rst +++ /dev/null @@ -1,573 +0,0 @@ -========== -User guide -========== - -.. py:currentmodule:: happybase - -This user guide explores the HappyBase API and should provide you with enough -information to get you started. Note that this user guide is intended as an -introduction to HappyBase, not to HBase in general. Readers should already have -a basic understanding of HBase and its data model. - -While the user guide does cover most features, it is not a complete reference -guide. More information about the HappyBase API is available from the :doc:`API -documentation `. - -.. contents:: On this page - :local: - - -Establishing a connection -========================= - -We'll get started by connecting to HBase. Just create a new -:py:class:`Connection` instance:: - - import happybase - - connection = happybase.Connection('somehost') - -In some setups, the :py:class:`Connection` class needs some additional -information about the HBase version it will be connecting to, and which Thrift -transport to use. If you're still using HBase 0.90.x, you need to set the -`compat` argument to make sure HappyBase speaks the correct wire protocol. -Additionally, if you're using HBase 0.94 with a non-standard Thrift transport -mode, make sure to supply the right `transport` argument. See the API -documentation for the :py:class:`Connection` class for more information about -these arguments and their supported values. - -When a :py:class:`Connection` is created, it automatically opens a socket -connection to the HBase Thrift server. This behaviour can be disabled by -setting the `autoconnect` argument to `False`, and opening the connection -manually using :py:meth:`Connection.open`:: - - connection = happybase.Connection('somehost', autoconnect=False) - - # before first use: - connection.open() - -The :py:class:`Connection` class provides the main entry point to interact with -HBase. For instance, to list the available tables, use -:py:meth:`Connection.tables`:: - - print(connection.tables()) - -Most other methods on the :py:class:`Connection` class are intended for system -management tasks like creating, dropping, enabling and disabling tables. See the -:doc:`API documentation ` for the :py:class:`Connection` class contains -more information. This user guide does not cover those since it's more likely -you are already using the HBase shell for these system management tasks. - -.. note:: - - HappyBase also features a connection pool, which is covered later in this - guide. - - -Working with tables -=================== - -The :py:class:`Table` class provides the main API to retrieve and manipulate -data in HBase. In the example above, we already asked for the available tables -using the :py:meth:`Connection.tables` method. If there weren't any tables yet, -you can create a new one using :py:meth:`Connection.create_table`:: - - connection.create_table( - 'mytable', - {'cf1': dict(max_versions=10), - 'cf2': dict(max_versions=1, block_cache_enabled=False), - 'cf3': dict(), # use defaults - } - ) - -.. note:: - - The HBase shell is often a better alternative for many HBase administration - tasks, since the shell is more powerful compared to the limited Thrift API - that HappyBase uses. - -The next step is to obtain a :py:class:`.Table` instance to work with. Simply -call :py:meth:`Connection.table`, passing it the table name:: - - table = connection.table('mytable') - -Obtaining a :py:class:`Table` instance does *not* result in a round-trip to the -Thrift server, which means application code may ask the :py:class:`Connection` -instance for a new :py:class:`Table` whenever it needs one, without negative -performance consequences. A side effect is that no check is done to ensure that -the table exists, since that would involve a round-trip. Expect errors if you -try to interact with non-existing tables later in your code. For this guide, we -assume the table exists. - -.. note:: - - The ‘heavy’ `HTable` HBase class from the Java HBase API, which performs the - real communication with the region servers, is at the other side of the - Thrift connection. There is no direct mapping between :py:class:`Table` - instances on the Python side and `HTable` instances on the server side. - -Using table ‘namespaces’ ------------------------- - -If a single HBase instance is shared by multiple applications, table names used -by different applications may collide. A simple solution to this problem is to -add a ‘namespace’ prefix to the names of all tables ‘owned’ by a specific -application, e.g. for a project ``myproject`` all tables have names like -``myproject_XYZ``. - -Instead of adding this application-specific prefix each time a table name is -passed to HappyBase, the `table_prefix` argument to :py:class:`Connection` can -take care of this. HappyBase will prepend that prefix (and an underscore) to -each table name handled by that :py:class:`Connection` instance. For example:: - - connection = happybase.Connection('somehost', table_prefix='myproject') - -At this point, :py:meth:`Connection.tables` no longer includes tables in other -‘namespaces’. HappyBase will only return tables with a ``myproject_`` prefix, -and will also remove the prefix transparently when returning results, e.g.:: - - print(connection.tables()) # Table "myproject_XYZ" in HBase will be - # returned as simply "XYZ" - -This also applies to other methods that take table names, such as -:py:meth:`Connection.table`:: - - table = connection.table('XYZ') # Operates on myproject_XYZ in HBase - -The end result is that the table prefix is specified only once in your code, -namely in the call to the :py:class:`Connection` constructor, and that only a -single change is necessary in case it needs changing. - - -Retrieving data -=============== - -The HBase data model is a multidimensional sparse map. A table in HBase -contains column families with column qualifiers containing a value and a -timestamp. In most of the HappyBase API, column family and qualifier names are -specified as a single string, e.g. ``cf1:col1``, and not as two separate -arguments. While column families and qualifiers are different concepts in the -HBase data model, they are almost always used together when interacting with -data, so treating them as a single string makes the API a lot simpler. - -Retrieving rows ---------------- - -The :py:class:`Table` class offers various methods to retrieve data from a -table in HBase. The most basic one is :py:meth:`Table.row`, which retrieves a -single row from the table, and returns it as a dictionary mapping columns to -values:: - - row = table.row(b'row-key') - print(row[b'cf1:col1']) # prints the value of cf1:col1 - -The :py:meth:`Table.rows` method works just like :py:meth:`Table.row`, but -takes multiple row keys and returns those as `(key, data)` tuples:: - - rows = table.rows([b'row-key-1', b'row-key-2']) - for key, data in rows: - print(key, data) - -If you want the results that :py:meth:`Table.rows` returns as a dictionary or -ordered dictionary, you will have to do this yourself. This is really easy -though, since the return value can be passed directly to the dictionary -constructor. For a normal dictionary, order is lost:: - - rows_as_dict = dict(table.rows([b'row-key-1', b'row-key-2'])) - -…whereas for a :py:class:`OrderedDict`, order is preserved:: - - from collections import OrderedDict - rows_as_ordered_dict = OrderedDict(table.rows([b'row-key-1', b'row-key-2'])) - - -Making more fine-grained selections ------------------------------------ - -HBase's data model allows for more fine-grained selections of the data to -retrieve. If you know beforehand which columns are needed, performance can be -improved by specifying those columns explicitly to :py:meth:`Table.row` and -:py:meth:`Table.rows`. The `columns` argument takes a list (or tuple) of column -names:: - - row = table.row(b'row-key', columns=[b'cf1:col1', b'cf1:col2']) - print(row[b'cf1:col1']) - print(row[b'cf1:col2']) - -Instead of providing both a column family and a column qualifier, items in the -`columns` argument may also be just a column family, which means that all -columns from that column family will be retrieved. For example, to get all -columns and values in the column family `cf1`, use this:: - - row = table.row(b'row-key', columns=[b'cf1']) - -In HBase, each cell has a timestamp attached to it. In case you don't want to -work with the latest version of data stored in HBase, the methods that retrieve -data from the database, e.g. :py:meth:`Table.row`, all accept a `timestamp` -argument that specifies that the results should be restricted to values with a -timestamp up to the specified timestamp:: - - row = table.row(b'row-key', timestamp=123456789) - -By default, HappyBase does not include timestamps in the results it returns. In -your application needs access to the timestamps, simply set the -`include_timestamp` argument to ``True``. Now, each cell in the result will be -returned as a `(value, timestamp)` tuple instead of just a value:: - - row = table.row(b'row-key', columns=[b'cf1:col1'], include_timestamp=True) - value, timestamp = row[b'cf1:col1'] - -HBase supports storing multiple versions of the same cell. This can be -configured for each column family. To retrieve all versions of a column for a -given row, :py:meth:`Table.cells` can be used. This method returns an ordered -list of cells, with the most recent version coming first. The `versions` -argument specifies the maximum number of versions to return. Just like the -methods that retrieve rows, the `include_timestamp` argument determines whether -timestamps are included in the result. Example:: - - values = table.cells(b'row-key', b'cf1:col1', versions=2) - for value in values: - print("Cell data: {}".format(value)) - - cells = table.cells(b'row-key', b'cf1:col1', versions=3, include_timestamp=True) - for value, timestamp in cells: - print("Cell data at {}: {}".format(timestamp, value)) - -Note that the result may contain fewer cells than requested. The cell may just -have fewer versions, or you may have requested more versions than HBase keeps -for the column family. - -Scanning over rows in a table ------------------------------ - -In addition to retrieving data for known row keys, rows in HBase can be -efficiently iterated over using a table scanner, created using -:py:meth:`Table.scan`. A basic scanner that iterates over all rows in the table -looks like this:: - - for key, data in table.scan(): - print(key, data) - -Doing full table scans like in the example above is prohibitively expensive in -practice. Scans can be restricted in several ways to make more selective range -queries. One way is to specify start or stop keys, or both. To iterate over all -rows from row `aaa` to the end of the table:: - - for key, data in table.scan(row_start=b'aaa'): - print(key, data) - -To iterate over all rows from the start of the table up to row `xyz`, use this:: - - for key, data in table.scan(row_stop=b'xyz'): - print(key, data) - -To iterate over all rows between row `aaa` (included) and `xyz` (not included), -supply both:: - - for key, data in table.scan(row_start=b'aaa', row_stop=b'xyz'): - print(key, data) - -An alternative is to use a key prefix. For example, to iterate over all rows -starting with `abc`:: - - for key, data in table.scan(row_prefix=b'abc'): - print(key, data) - -The scanner examples above only limit the results by row key using the -`row_start`, `row_stop`, and `row_prefix` arguments, but scanners can also -limit results to certain columns, column families, and timestamps, just like -:py:meth:`Table.row` and :py:meth:`Table.rows`. For advanced users, a filter -string can be passed as the `filter` argument. Additionally, the optional -`limit` argument defines how much data is at most retrieved, and the -`batch_size` argument specifies how big the transferred chunks should be. The -:py:meth:`Table.scan` API documentation provides more information on the -supported scanner options. - - -Manipulating data -================= - -HBase does not have any notion of *data types*; all row keys, column -names and column values are simply treated as raw byte strings. - -By design, HappyBase does *not* do any automatic string conversion. -This means that data must be converted to byte strings in your -application before you pass it to HappyBase, for instance by calling -``s.encode('utf-8')`` on text strings (which use Unicode), or by -employing more advanced string serialisation techniques like -``struct.pack()``. Look for HBase modelling techniques for more -details about this. Note that the underlying Thrift library used by -HappyBase does some automatic encoding of text strings into bytes, but -relying on this "feature" is strongly discouraged, since returned data -will not be decoded automatically, resulting in asymmetric and hence -confusing behaviour. Having explicit encode and decode steps in your -application code is the correct way. - -In HBase, all mutations either store data or mark data for deletion; there is -no such thing as an in-place `update` or `delete`. HappyBase provides methods -to do single inserts or deletes, and a batch API to perform multiple mutations -in one go. - -Storing data ------------- - -To store a single cell of data in our table, we can use :py:meth:`Table.put`, -which takes the row key, and the data to store. The data should be a dictionary -mapping the column name to a value:: - - table.put(b'row-key', {b'cf:col1': b'value1', - b'cf:col2': b'value2'}) - -Use the `timestamp` argument if you want to provide timestamps explicitly:: - - table.put(b'row-key', {b'cf:col1': b'value1'}, timestamp=123456789) - -If omitted, HBase defaults to the current system time. - -Deleting data -------------- - -The :py:meth:`Table.delete` method deletes data from a table. To delete a -complete row, just specify the row key:: - - table.delete(b'row-key') - -To delete one or more columns instead of a complete row, also specify the -`columns` argument:: - - table.delete(b'row-key', columns=[b'cf1:col1', b'cf1:col2']) - -The optional `timestamp` argument restricts the delete operation to data up to -the specified timestamp. - -Performing batch mutations --------------------------- - -The :py:meth:`Table.put` and :py:meth:`Table.delete` methods both issue a -command to the HBase Thrift server immediately. This means that using these -methods is not very efficient when storing or deleting multiple values. It is -much more efficient to aggregate a bunch of commands and send them to the -server in one go. This is exactly what the :py:class:`Batch` class, created -using :py:meth:`Table.batch`, does. A :py:class:`Batch` instance has put and -delete methods, just like the :py:class:`Table` class, but the changes are sent -to the server in a single round-trip using :py:meth:`Batch.send`:: - - b = table.batch() - b.put(b'row-key-1', {b'cf:col1': b'value1', b'cf:col2': b'value2'}) - b.put(b'row-key-2', {b'cf:col2': b'value2', b'cf:col3': b'value3'}) - b.put(b'row-key-3', {b'cf:col3': b'value3', b'cf:col4': b'value4'}) - b.delete(b'row-key-4') - b.send() - -.. note:: - - Storing and deleting data for the same row key in a single batch leads to - unpredictable results, so don't do that. - -While the methods on the :py:class:`Batch` instance resemble the -:py:meth:`~Table.put` and :py:meth:`~Table.delete` methods, they do not take a -`timestamp` argument for each mutation. Instead, you can specify a single -`timestamp` argument for the complete batch:: - - b = table.batch(timestamp=123456789) - b.put(...) - b.delete(...) - b.send() - -:py:class:`Batch` instances can be used as *context managers*, which are most -useful in combination with Python's ``with`` construct. The example above can -be simplified to read:: - - with table.batch() as b: - b.put(b'row-key-1', {b'cf:col1': b'value1', b'cf:col2': b'value2'}) - b.put(b'row-key-2', {b'cf:col2': b'value2', b'cf:col3': b'value3'}) - b.put(b'row-key-3', {b'cf:col3': b'value3', b'cf:col4': b'value4'}) - b.delete(b'row-key-4') - -As you can see, there is no call to :py:meth:`Batch.send` anymore. The batch is -automatically applied when the ``with`` code block terminates, even in case of -errors somewhere in the ``with`` block, so it behaves basically the same as a -``try/finally`` clause. However, some applications require transactional -behaviour, sending the batch only if no exception occurred. Without a context -manager this would look something like this:: - - b = table.batch() - try: - b.put(b'row-key-1', {b'cf:col1': b'value1', b'cf:col2': b'value2'}) - b.put(b'row-key-2', {b'cf:col2': b'value2', b'cf:col3': b'value3'}) - b.put(b'row-key-3', {b'cf:col3': b'value3', b'cf:col4': b'value4'}) - b.delete(b'row-key-4') - raise ValueError("Something went wrong!") - except ValueError as e: - # error handling goes here; nothing will be sent to HBase - pass - else: - # no exceptions; send data - b.send() - -Obtaining the same behaviour is easier using a ``with`` block. The -`transaction` argument to :py:meth:`Table.batch` is all you need:: - - try: - with table.batch(transaction=True) as b: - b.put(b'row-key-1', {b'cf:col1': b'value1', b'cf:col2': b'value2'}) - b.put(b'row-key-2', {b'cf:col2': b'value2', b'cf:col3': b'value3'}) - b.put(b'row-key-3', {b'cf:col3': b'value3', b'cf:col4': b'value4'}) - b.delete(b'row-key-4') - raise ValueError("Something went wrong!") - except ValueError: - # error handling goes here; nothing is sent to HBase - pass - - # when no error occurred, the transaction succeeded - -As you may have imagined already, a :py:class:`Batch` keeps all mutations in -memory until the batch is sent, either by calling :py:meth:`Batch.send()` -explicitly, or when the ``with`` block ends. This doesn't work for applications -that need to store huge amounts of data, since it may result in batches that -are too big to send in one round-trip, or in batches that use too much memory. -For these cases, the `batch_size` argument can be specified. The `batch_size` -acts as a threshold: a :py:class:`Batch` instance automatically sends all -pending mutations when there are more than `batch_size` pending operations. For -example, this will result in three round-trips to the server (two batches with -1000 cells, and one with the remaining 400):: - - with table.batch(batch_size=1000) as b: - for i in range(1200): - # this put() will result in two mutations (two cells) - b.put(b'row-%04d' % i, { - b'cf1:col1': b'v1', - b'cf1:col2': b'v2', - }) - -The appropriate `batch_size` is very application-specific since it depends on -the data size, so just experiment to see how different sizes work for your -specific use case. - -Using atomic counters ---------------------- - -The :py:meth:`Table.counter_inc` and :py:meth:`Table.counter_dec` methods allow -for atomic incrementing and decrementing of 8 byte wide values, which are -interpreted as big-endian 64-bit signed integers by HBase. Counters are -automatically initialised to 0 upon first use. When incrementing or -decrementing a counter, the value after modification is returned. Example:: - - print(table.counter_inc(b'row-key', b'cf1:counter')) # prints 1 - print(table.counter_inc(b'row-key', b'cf1:counter')) # prints 2 - print(table.counter_inc(b'row-key', b'cf1:counter')) # prints 3 - - print(table.counter_dec(b'row-key', b'cf1:counter')) # prints 2 - -The optional `value` argument specifies how much to increment or decrement by:: - - print(table.counter_inc(b'row-key', b'cf1:counter', value=3)) # prints 5 - -While counters are typically used with the increment and decrement functions -shown above, the :py:meth:`Table.counter_get` and :py:meth:`Table.counter_set` -methods can be used to retrieve or set a counter value directly:: - - print(table.counter_get(b'row-key', b'cf1:counter')) # prints 5 - - table.counter_set(b'row-key', b'cf1:counter', 12) - -.. note:: - - An application should *never* :py:meth:`~Table.counter_get` the current - value, modify it in code and then :py:meth:`~Table.counter_set` the modified - value; use the atomic :py:meth:`~Table.counter_inc` and - :py:meth:`~Table.counter_dec` instead! - - -Using the connection pool -========================= - -HappyBase comes with a thread-safe connection pool that allows multiple threads -to share and reuse open connections. This is most useful in multi-threaded -server applications such as web applications served using Apache's `mod_wsgi`. -When a thread asks the pool for a connection (using -:py:meth:`ConnectionPool.connection`), it will be granted a lease, during which -the thread has exclusive access to the connection. After the thread is done -using the connection, it returns the connection to the pool so that it becomes -available for other threads. - -Instantiating the pool ----------------------- - -The pool is provided by the :py:class:`ConnectionPool` class. The `size` -argument to the constructor specifies the number of connections in the pool. -Additional arguments are passed on to the :py:class:`Connection` constructor:: - - pool = happybase.ConnectionPool(size=3, host='...', table_prefix='myproject') - -Upon instantiation, the connection pool will establish a connection immediately, -so that simple problems like wrong host names are detected immediately. For the -remaining connections, the pool acts lazy: new connections will be opened only -when needed. - -Obtaining connections ---------------------- - -Connections can only be obtained using Python's context manager protocol, i.e. -using a code block inside a ``with`` statement. This ensures that connections -are actually returned to the pool after use. Example:: - - pool = happybase.ConnectionPool(size=3, host='...') - - with pool.connection() as connection: - print(connection.tables()) - -.. warning:: - - Never use the ``connection`` instance after the ``with`` block has ended. - Even though the variable is still in scope, the connection may have been - assigned to another thread in the mean time. - -Connections should be returned to the pool as quickly as possible, so that other -threads can use them. This means that the amount of code included inside the -``with`` block should be kept to an absolute minimum. In practice, an -application should only load data inside the ``with`` block, and process the -data outside the ``with`` block:: - - with pool.connection() as connection: - table = connection.table('table-name') - row = table.row(b'row-key') - - process_data(row) - -An application thread can only hold one connection at a time. When a thread -holds a connection and asks for a connection for a second time (e.g. because a -called function also requests a connection from the pool), the same connection -instance it already holds is returned, so this does not require any coordination -from the application. This means that in the following example, both connection -requests to the pool will return the exact same connection:: - - pool = happybase.ConnectionPool(size=3, host='...') - - def do_something_else(): - with pool.connection() as connection: - pass # use the connection here - - with pool.connection() as connection: - # use the connection here, e.g. - print(connection.tables()) - - # call another function that uses a connection - do_something_else() - -Handling broken connections ---------------------------- - -The pool tries to detect broken connections and will replace those with fresh -ones when the connection is returned to the pool. However, the connection pool -does not capture raised exceptions, nor does it automatically retry failed -operations. This means that the application still has to handle connection -errors. - - -.. rubric:: Next steps - -The next step is to try it out for yourself! The :doc:`API documentation ` -can be used as a reference. - -.. vim: set spell spelllang=en: diff --git a/happybase-0.7.tar.gz.delta b/happybase-0.7.tar.gz.delta new file mode 100644 index 0000000..9d1797c Binary files /dev/null and b/happybase-0.7.tar.gz.delta differ diff --git a/happybase-0.7.tar.gz.id b/happybase-0.7.tar.gz.id new file mode 100644 index 0000000..3208383 --- /dev/null +++ b/happybase-0.7.tar.gz.id @@ -0,0 +1 @@ +e4e8b07390b882aa7dc171329761c36d8d5120ef diff --git a/happybase-0.8.tar.gz.delta b/happybase-0.8.tar.gz.delta new file mode 100644 index 0000000..812bded Binary files /dev/null and b/happybase-0.8.tar.gz.delta differ diff --git a/happybase-0.8.tar.gz.id b/happybase-0.8.tar.gz.id new file mode 100644 index 0000000..ba686c5 --- /dev/null +++ b/happybase-0.8.tar.gz.id @@ -0,0 +1 @@ +d4c26a7c9997afadc8c1465e4eadf7ebc317706b diff --git a/happybase-0.9.tar.gz.delta b/happybase-0.9.tar.gz.delta new file mode 100644 index 0000000..d9b2708 Binary files /dev/null and b/happybase-0.9.tar.gz.delta differ diff --git a/happybase-0.9.tar.gz.id b/happybase-0.9.tar.gz.id new file mode 100644 index 0000000..f09c804 --- /dev/null +++ b/happybase-0.9.tar.gz.id @@ -0,0 +1 @@ +2976f3f091393407394c18f5060af140a787df97 diff --git a/happybase/Hbase.thrift b/happybase/Hbase.thrift deleted file mode 100644 index e9836cd..0000000 --- a/happybase/Hbase.thrift +++ /dev/null @@ -1,975 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// ---------------------------------------------------------------- -// Hbase.thrift -// -// This is a Thrift interface definition file for the Hbase service. -// Target language libraries for C++, Java, Ruby, PHP, (and more) are -// generated by running this file through the Thrift compiler with the -// appropriate flags. The Thrift compiler binary and runtime -// libraries for various languages are available -// from the Apache Incubator (http://incubator.apache.org/thrift/) -// -// See the package.html file for information on the version of Thrift -// used to generate the *.java files checked into the Hbase project. -// ---------------------------------------------------------------- - -namespace java org.apache.hadoop.hbase.thrift.generated -namespace cpp apache.hadoop.hbase.thrift -namespace rb Apache.Hadoop.Hbase.Thrift -namespace py hbase -namespace perl Hbase -namespace php Hbase -// -// Types -// - -// NOTE: all variables with the Text type are assumed to be correctly -// formatted UTF-8 strings. This is a programming language and locale -// dependent property that the client application is repsonsible for -// maintaining. If strings with an invalid encoding are sent, an -// IOError will be thrown. - -typedef binary Text -typedef binary Bytes -typedef i32 ScannerID - -/** - * TCell - Used to transport a cell value (byte[]) and the timestamp it was - * stored with together as a result for get and getRow methods. This promotes - * the timestamp of a cell to a first-class value, making it easy to take - * note of temporal data. Cell is used all the way from HStore up to HTable. - */ -struct TCell{ - 1:Bytes value, - 2:i64 timestamp -} - -/** - * An HColumnDescriptor contains information about a column family - * such as the number of versions, compression settings, etc. It is - * used as input when creating a table or adding a column. - */ -struct ColumnDescriptor { - 1:Text name, - 2:i32 maxVersions = 3, - 3:string compression = "NONE", - 4:bool inMemory = 0, - 5:string bloomFilterType = "NONE", - 6:i32 bloomFilterVectorSize = 0, - 7:i32 bloomFilterNbHashes = 0, - 8:bool blockCacheEnabled = 0, - 9:i32 timeToLive = -1 -} - -/** - * A TRegionInfo contains information about an HTable region. - */ -struct TRegionInfo { - 1:Text startKey, - 2:Text endKey, - 3:i64 id, - 4:Text name, - 5:byte version, - 6:Text serverName, - 7:i32 port -} - -/** - * A Mutation object is used to either update or delete a column-value. - */ -struct Mutation { - 1:bool isDelete = 0, - 2:Text column, - 3:Text value, - 4:bool writeToWAL = 1 -} - - -/** - * A BatchMutation object is used to apply a number of Mutations to a single row. - */ -struct BatchMutation { - 1:Text row, - 2:list mutations -} - -/** - * For increments that are not incrementColumnValue - * equivalents. - */ -struct TIncrement { - 1:Text table, - 2:Text row, - 3:Text column, - 4:i64 ammount -} - -/** - * Holds column name and the cell. - */ -struct TColumn { - 1:Text columnName, - 2:TCell cell - } - -/** - * Holds row name and then a map of columns to cells. - */ -struct TRowResult { - 1:Text row, - 2:optional map columns, - 3:optional list sortedColumns -} - -/** - * A Scan object is used to specify scanner parameters when opening a scanner. - */ -struct TScan { - 1:optional Text startRow, - 2:optional Text stopRow, - 3:optional i64 timestamp, - 4:optional list columns, - 5:optional i32 caching, - 6:optional Text filterString, - 7:optional i32 batchSize, - 8:optional bool sortColumns, - 9:optional bool reversed -} - -/** - * An Append object is used to specify the parameters for performing the append operation. - */ -struct TAppend { - 1:Text table, - 2:Text row, - 3:list columns, - 4:list values -} - -// -// Exceptions -// -/** - * An IOError exception signals that an error occurred communicating - * to the Hbase master or an Hbase region server. Also used to return - * more general Hbase error conditions. - */ -exception IOError { - 1:string message -} - -/** - * An IllegalArgument exception indicates an illegal or invalid - * argument was passed into a procedure. - */ -exception IllegalArgument { - 1:string message -} - -/** - * An AlreadyExists exceptions signals that a table with the specified - * name already exists - */ -exception AlreadyExists { - 1:string message -} - -// -// Service -// - -service Hbase { - /** - * Brings a table on-line (enables it) - */ - void enableTable( - /** name of the table */ - 1:Bytes tableName - ) throws (1:IOError io) - - /** - * Disables a table (takes it off-line) If it is being served, the master - * will tell the servers to stop serving it. - */ - void disableTable( - /** name of the table */ - 1:Bytes tableName - ) throws (1:IOError io) - - /** - * @return true if table is on-line - */ - bool isTableEnabled( - /** name of the table to check */ - 1:Bytes tableName - ) throws (1:IOError io) - - void compact(1:Bytes tableNameOrRegionName) - throws (1:IOError io) - - void majorCompact(1:Bytes tableNameOrRegionName) - throws (1:IOError io) - - /** - * List all the userspace tables. - * - * @return returns a list of names - */ - list getTableNames() - throws (1:IOError io) - - /** - * List all the column families assoicated with a table. - * - * @return list of column family descriptors - */ - map getColumnDescriptors ( - /** table name */ - 1:Text tableName - ) throws (1:IOError io) - - /** - * List the regions associated with a table. - * - * @return list of region descriptors - */ - list getTableRegions( - /** table name */ - 1:Text tableName) - throws (1:IOError io) - - /** - * Create a table with the specified column families. The name - * field for each ColumnDescriptor must be set and must end in a - * colon (:). All other fields are optional and will get default - * values if not explicitly specified. - * - * @throws IllegalArgument if an input parameter is invalid - * - * @throws AlreadyExists if the table name already exists - */ - void createTable( - /** name of table to create */ - 1:Text tableName, - - /** list of column family descriptors */ - 2:list columnFamilies - ) throws (1:IOError io, 2:IllegalArgument ia, 3:AlreadyExists exist) - - /** - * Deletes a table - * - * @throws IOError if table doesn't exist on server or there was some other - * problem - */ - void deleteTable( - /** name of table to delete */ - 1:Text tableName - ) throws (1:IOError io) - - /** - * Get a single TCell for the specified table, row, and column at the - * latest timestamp. Returns an empty list if no such value exists. - * - * @return value for specified row/column - */ - list get( - /** name of table */ - 1:Text tableName, - - /** row key */ - 2:Text row, - - /** column name */ - 3:Text column, - - /** Get attributes */ - 4:map attributes - ) throws (1:IOError io) - - /** - * Get the specified number of versions for the specified table, - * row, and column. - * - * @return list of cells for specified row/column - */ - list getVer( - /** name of table */ - 1:Text tableName, - - /** row key */ - 2:Text row, - - /** column name */ - 3:Text column, - - /** number of versions to retrieve */ - 4:i32 numVersions, - - /** Get attributes */ - 5:map attributes - ) throws (1:IOError io) - - /** - * Get the specified number of versions for the specified table, - * row, and column. Only versions less than or equal to the specified - * timestamp will be returned. - * - * @return list of cells for specified row/column - */ - list getVerTs( - /** name of table */ - 1:Text tableName, - - /** row key */ - 2:Text row, - - /** column name */ - 3:Text column, - - /** timestamp */ - 4:i64 timestamp, - - /** number of versions to retrieve */ - 5:i32 numVersions, - - /** Get attributes */ - 6:map attributes - ) throws (1:IOError io) - - /** - * Get all the data for the specified table and row at the latest - * timestamp. Returns an empty list if the row does not exist. - * - * @return TRowResult containing the row and map of columns to TCells - */ - list getRow( - /** name of table */ - 1:Text tableName, - - /** row key */ - 2:Text row, - - /** Get attributes */ - 3:map attributes - ) throws (1:IOError io) - - /** - * Get the specified columns for the specified table and row at the latest - * timestamp. Returns an empty list if the row does not exist. - * - * @return TRowResult containing the row and map of columns to TCells - */ - list getRowWithColumns( - /** name of table */ - 1:Text tableName, - - /** row key */ - 2:Text row, - - /** List of columns to return, null for all columns */ - 3:list columns, - - /** Get attributes */ - 4:map attributes - ) throws (1:IOError io) - - /** - * Get all the data for the specified table and row at the specified - * timestamp. Returns an empty list if the row does not exist. - * - * @return TRowResult containing the row and map of columns to TCells - */ - list getRowTs( - /** name of the table */ - 1:Text tableName, - - /** row key */ - 2:Text row, - - /** timestamp */ - 3:i64 timestamp, - - /** Get attributes */ - 4:map attributes - ) throws (1:IOError io) - - /** - * Get the specified columns for the specified table and row at the specified - * timestamp. Returns an empty list if the row does not exist. - * - * @return TRowResult containing the row and map of columns to TCells - */ - list getRowWithColumnsTs( - /** name of table */ - 1:Text tableName, - - /** row key */ - 2:Text row, - - /** List of columns to return, null for all columns */ - 3:list columns, - 4:i64 timestamp, - - /** Get attributes */ - 5:map attributes - ) throws (1:IOError io) - - /** - * Get all the data for the specified table and rows at the latest - * timestamp. Returns an empty list if no rows exist. - * - * @return TRowResult containing the rows and map of columns to TCells - */ - list getRows( - /** name of table */ - 1:Text tableName, - - /** row keys */ - 2:list rows - - /** Get attributes */ - 3:map attributes - ) throws (1:IOError io) - - /** - * Get the specified columns for the specified table and rows at the latest - * timestamp. Returns an empty list if no rows exist. - * - * @return TRowResult containing the rows and map of columns to TCells - */ - list getRowsWithColumns( - /** name of table */ - 1:Text tableName, - - /** row keys */ - 2:list rows, - - /** List of columns to return, null for all columns */ - 3:list columns, - - /** Get attributes */ - 4:map attributes - ) throws (1:IOError io) - - /** - * Get all the data for the specified table and rows at the specified - * timestamp. Returns an empty list if no rows exist. - * - * @return TRowResult containing the rows and map of columns to TCells - */ - list getRowsTs( - /** name of the table */ - 1:Text tableName, - - /** row keys */ - 2:list rows - - /** timestamp */ - 3:i64 timestamp, - - /** Get attributes */ - 4:map attributes - ) throws (1:IOError io) - - /** - * Get the specified columns for the specified table and rows at the specified - * timestamp. Returns an empty list if no rows exist. - * - * @return TRowResult containing the rows and map of columns to TCells - */ - list getRowsWithColumnsTs( - /** name of table */ - 1:Text tableName, - - /** row keys */ - 2:list rows - - /** List of columns to return, null for all columns */ - 3:list columns, - 4:i64 timestamp, - - /** Get attributes */ - 5:map attributes - ) throws (1:IOError io) - - /** - * Apply a series of mutations (updates/deletes) to a row in a - * single transaction. If an exception is thrown, then the - * transaction is aborted. Default current timestamp is used, and - * all entries will have an identical timestamp. - */ - void mutateRow( - /** name of table */ - 1:Text tableName, - - /** row key */ - 2:Text row, - - /** list of mutation commands */ - 3:list mutations, - - /** Mutation attributes */ - 4:map attributes - ) throws (1:IOError io, 2:IllegalArgument ia) - - /** - * Apply a series of mutations (updates/deletes) to a row in a - * single transaction. If an exception is thrown, then the - * transaction is aborted. The specified timestamp is used, and - * all entries will have an identical timestamp. - */ - void mutateRowTs( - /** name of table */ - 1:Text tableName, - - /** row key */ - 2:Text row, - - /** list of mutation commands */ - 3:list mutations, - - /** timestamp */ - 4:i64 timestamp, - - /** Mutation attributes */ - 5:map attributes - ) throws (1:IOError io, 2:IllegalArgument ia) - - /** - * Apply a series of batches (each a series of mutations on a single row) - * in a single transaction. If an exception is thrown, then the - * transaction is aborted. Default current timestamp is used, and - * all entries will have an identical timestamp. - */ - void mutateRows( - /** name of table */ - 1:Text tableName, - - /** list of row batches */ - 2:list rowBatches, - - /** Mutation attributes */ - 3:map attributes - ) throws (1:IOError io, 2:IllegalArgument ia) - - /** - * Apply a series of batches (each a series of mutations on a single row) - * in a single transaction. If an exception is thrown, then the - * transaction is aborted. The specified timestamp is used, and - * all entries will have an identical timestamp. - */ - void mutateRowsTs( - /** name of table */ - 1:Text tableName, - - /** list of row batches */ - 2:list rowBatches, - - /** timestamp */ - 3:i64 timestamp, - - /** Mutation attributes */ - 4:map attributes - ) throws (1:IOError io, 2:IllegalArgument ia) - - /** - * Atomically increment the column value specified. Returns the next value post increment. - */ - i64 atomicIncrement( - /** name of table */ - 1:Text tableName, - - /** row to increment */ - 2:Text row, - - /** name of column */ - 3:Text column, - - /** amount to increment by */ - 4:i64 value - ) throws (1:IOError io, 2:IllegalArgument ia) - - /** - * Delete all cells that match the passed row and column. - */ - void deleteAll( - /** name of table */ - 1:Text tableName, - - /** Row to update */ - 2:Text row, - - /** name of column whose value is to be deleted */ - 3:Text column, - - /** Delete attributes */ - 4:map attributes - ) throws (1:IOError io) - - /** - * Delete all cells that match the passed row and column and whose - * timestamp is equal-to or older than the passed timestamp. - */ - void deleteAllTs( - /** name of table */ - 1:Text tableName, - - /** Row to update */ - 2:Text row, - - /** name of column whose value is to be deleted */ - 3:Text column, - - /** timestamp */ - 4:i64 timestamp, - - /** Delete attributes */ - 5:map attributes - ) throws (1:IOError io) - - /** - * Completely delete the row's cells. - */ - void deleteAllRow( - /** name of table */ - 1:Text tableName, - - /** key of the row to be completely deleted. */ - 2:Text row, - - /** Delete attributes */ - 3:map attributes - ) throws (1:IOError io) - - /** - * Increment a cell by the ammount. - * Increments can be applied async if hbase.regionserver.thrift.coalesceIncrement is set to true. - * False is the default. Turn to true if you need the extra performance and can accept some - * data loss if a thrift server dies with increments still in the queue. - */ - void increment( - /** The single increment to apply */ - 1:TIncrement increment - ) throws (1:IOError io) - - - void incrementRows( - /** The list of increments */ - 1:list increments - ) throws (1:IOError io) - - /** - * Completely delete the row's cells marked with a timestamp - * equal-to or older than the passed timestamp. - */ - void deleteAllRowTs( - /** name of table */ - 1:Text tableName, - - /** key of the row to be completely deleted. */ - 2:Text row, - - /** timestamp */ - 3:i64 timestamp, - - /** Delete attributes */ - 4:map attributes - ) throws (1:IOError io) - - /** - * Get a scanner on the current table, using the Scan instance - * for the scan parameters. - */ - ScannerID scannerOpenWithScan( - /** name of table */ - 1:Text tableName, - - /** Scan instance */ - 2:TScan scan, - - /** Scan attributes */ - 3:map attributes - ) throws (1:IOError io) - - /** - * Get a scanner on the current table starting at the specified row and - * ending at the last row in the table. Return the specified columns. - * - * @return scanner id to be used with other scanner procedures - */ - ScannerID scannerOpen( - /** name of table */ - 1:Text tableName, - - /** - * Starting row in table to scan. - * Send "" (empty string) to start at the first row. - */ - 2:Text startRow, - - /** - * columns to scan. If column name is a column family, all - * columns of the specified column family are returned. It's also possible - * to pass a regex in the column qualifier. - */ - 3:list columns, - - /** Scan attributes */ - 4:map attributes - ) throws (1:IOError io) - - /** - * Get a scanner on the current table starting and stopping at the - * specified rows. ending at the last row in the table. Return the - * specified columns. - * - * @return scanner id to be used with other scanner procedures - */ - ScannerID scannerOpenWithStop( - /** name of table */ - 1:Text tableName, - - /** - * Starting row in table to scan. - * Send "" (empty string) to start at the first row. - */ - 2:Text startRow, - - /** - * row to stop scanning on. This row is *not* included in the - * scanner's results - */ - 3:Text stopRow, - - /** - * columns to scan. If column name is a column family, all - * columns of the specified column family are returned. It's also possible - * to pass a regex in the column qualifier. - */ - 4:list columns, - - /** Scan attributes */ - 5:map attributes - ) throws (1:IOError io) - - /** - * Open a scanner for a given prefix. That is all rows will have the specified - * prefix. No other rows will be returned. - * - * @return scanner id to use with other scanner calls - */ - ScannerID scannerOpenWithPrefix( - /** name of table */ - 1:Text tableName, - - /** the prefix (and thus start row) of the keys you want */ - 2:Text startAndPrefix, - - /** the columns you want returned */ - 3:list columns, - - /** Scan attributes */ - 4:map attributes - ) throws (1:IOError io) - - /** - * Get a scanner on the current table starting at the specified row and - * ending at the last row in the table. Return the specified columns. - * Only values with the specified timestamp are returned. - * - * @return scanner id to be used with other scanner procedures - */ - ScannerID scannerOpenTs( - /** name of table */ - 1:Text tableName, - - /** - * Starting row in table to scan. - * Send "" (empty string) to start at the first row. - */ - 2:Text startRow, - - /** - * columns to scan. If column name is a column family, all - * columns of the specified column family are returned. It's also possible - * to pass a regex in the column qualifier. - */ - 3:list columns, - - /** timestamp */ - 4:i64 timestamp, - - /** Scan attributes */ - 5:map attributes - ) throws (1:IOError io) - - /** - * Get a scanner on the current table starting and stopping at the - * specified rows. ending at the last row in the table. Return the - * specified columns. Only values with the specified timestamp are - * returned. - * - * @return scanner id to be used with other scanner procedures - */ - ScannerID scannerOpenWithStopTs( - /** name of table */ - 1:Text tableName, - - /** - * Starting row in table to scan. - * Send "" (empty string) to start at the first row. - */ - 2:Text startRow, - - /** - * row to stop scanning on. This row is *not* included in the - * scanner's results - */ - 3:Text stopRow, - - /** - * columns to scan. If column name is a column family, all - * columns of the specified column family are returned. It's also possible - * to pass a regex in the column qualifier. - */ - 4:list columns, - - /** timestamp */ - 5:i64 timestamp, - - /** Scan attributes */ - 6:map attributes - ) throws (1:IOError io) - - /** - * Returns the scanner's current row value and advances to the next - * row in the table. When there are no more rows in the table, or a key - * greater-than-or-equal-to the scanner's specified stopRow is reached, - * an empty list is returned. - * - * @return a TRowResult containing the current row and a map of the columns to TCells. - * - * @throws IllegalArgument if ScannerID is invalid - * - * @throws NotFound when the scanner reaches the end - */ - list scannerGet( - /** id of a scanner returned by scannerOpen */ - 1:ScannerID id - ) throws (1:IOError io, 2:IllegalArgument ia) - - /** - * Returns, starting at the scanner's current row value nbRows worth of - * rows and advances to the next row in the table. When there are no more - * rows in the table, or a key greater-than-or-equal-to the scanner's - * specified stopRow is reached, an empty list is returned. - * - * @return a TRowResult containing the current row and a map of the columns to TCells. - * - * @throws IllegalArgument if ScannerID is invalid - * - * @throws NotFound when the scanner reaches the end - */ - list scannerGetList( - /** id of a scanner returned by scannerOpen */ - 1:ScannerID id, - - /** number of results to return */ - 2:i32 nbRows - ) throws (1:IOError io, 2:IllegalArgument ia) - - /** - * Closes the server-state associated with an open scanner. - * - * @throws IllegalArgument if ScannerID is invalid - */ - void scannerClose( - /** id of a scanner returned by scannerOpen */ - 1:ScannerID id - ) throws (1:IOError io, 2:IllegalArgument ia) - - /** - * Get the row just before the specified one. - * - * @return value for specified row/column - */ - list getRowOrBefore( - /** name of table */ - 1:Text tableName, - - /** row key */ - 2:Text row, - - /** column name */ - 3:Text family - ) throws (1:IOError io) - - /** - * Get the regininfo for the specified row. It scans - * the metatable to find region's start and end keys. - * - * @return value for specified row/column - */ - TRegionInfo getRegionInfo( - /** row key */ - 1:Text row, - - ) throws (1:IOError io) - - /** - * Appends values to one or more columns within a single row. - * - * @return values of columns after the append operation. - */ - list append( - /** The single append operation to apply */ - 1:TAppend append, - - ) throws (1:IOError io) - - /** - * Atomically checks if a row/family/qualifier value matches the expected - * value. If it does, it adds the corresponding mutation operation for put. - * - * @return true if the new put was executed, false otherwise - */ - bool checkAndPut( - /** name of table */ - 1:Text tableName, - - /** row key */ - 2:Text row, - - /** column name */ - 3:Text column, - - /** the expected value for the column parameter, if not - provided the check is for the non-existence of the - column in question */ - 5:Text value - - /** mutation for the put */ - 6:Mutation mput, - - /** Mutation attributes */ - 7:map attributes - ) throws (1:IOError io, 2:IllegalArgument ia) -} diff --git a/happybase/__init__.py b/happybase/__init__.py deleted file mode 100644 index dfca2a7..0000000 --- a/happybase/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -HappyBase, a developer-friendly Python library to interact with Apache -HBase. -""" - -import importlib_resources as _importlib_resources -import thriftpy2 as _thriftpy - -_thriftpy.load( - str(_importlib_resources.files('happybase') / 'Hbase.thrift'), - 'Hbase_thrift') - -from ._version import __version__ # noqa - -from .connection import DEFAULT_HOST, DEFAULT_PORT, Connection # noqa -from .table import Table # noqa -from .batch import Batch # noqa -from .pool import ConnectionPool, NoConnectionsAvailable # noqa diff --git a/happybase/_version.py b/happybase/_version.py deleted file mode 100644 index 3f5e449..0000000 --- a/happybase/_version.py +++ /dev/null @@ -1,8 +0,0 @@ -""" -HappyBase version module. - -This module defines the package version for use in __init__.py and -setup.py. -""" - -__version__ = '1.3.0' diff --git a/happybase/batch.py b/happybase/batch.py deleted file mode 100644 index eec63e7..0000000 --- a/happybase/batch.py +++ /dev/null @@ -1,137 +0,0 @@ -""" -HappyBase Batch module. -""" - -from collections import defaultdict -import logging -from numbers import Integral - -import six - -from Hbase_thrift import BatchMutation, Mutation - -logger = logging.getLogger(__name__) - - -class Batch(object): - """Batch mutation class. - - This class cannot be instantiated directly; use :py:meth:`Table.batch` - instead. - """ - def __init__(self, table, timestamp=None, batch_size=None, - transaction=False, wal=True): - """Initialise a new Batch instance.""" - if not (timestamp is None or isinstance(timestamp, Integral)): - raise TypeError("'timestamp' must be an integer or None") - - if batch_size is not None: - if transaction: - raise TypeError("'transaction' cannot be used when " - "'batch_size' is specified") - if not batch_size > 0: - raise ValueError("'batch_size' must be > 0") - - self._table = table - self._batch_size = batch_size - self._timestamp = timestamp - self._transaction = transaction - self._wal = wal - self._families = None - self._reset_mutations() - - def _reset_mutations(self): - """Reset the internal mutation buffer.""" - self._mutations = defaultdict(list) - self._mutation_count = 0 - - def send(self): - """Send the batch to the server.""" - bms = [ - BatchMutation(row, m) - for row, m in six.iteritems(self._mutations) - ] - if not bms: - return - - logger.debug("Sending batch for '%s' (%d mutations on %d rows)", - self._table.name, self._mutation_count, len(bms)) - if self._timestamp is None: - self._table.connection.client.mutateRows(self._table.name, bms, {}) - else: - self._table.connection.client.mutateRowsTs( - self._table.name, bms, self._timestamp, {}) - - self._reset_mutations() - - # - # Mutation methods - # - - def put(self, row, data, wal=None): - """Store data in the table. - - See :py:meth:`Table.put` for a description of the `row`, `data`, - and `wal` arguments. The `wal` argument should normally not be - used; its only use is to override the batch-wide value passed to - :py:meth:`Table.batch`. - """ - if wal is None: - wal = self._wal - - self._mutations[row].extend( - Mutation( - isDelete=False, - column=column, - value=value, - writeToWAL=wal) - for column, value in six.iteritems(data)) - - self._mutation_count += len(data) - if self._batch_size and self._mutation_count >= self._batch_size: - self.send() - - def delete(self, row, columns=None, wal=None): - """Delete data from the table. - - See :py:meth:`Table.put` for a description of the `row`, `data`, - and `wal` arguments. The `wal` argument should normally not be - used; its only use is to override the batch-wide value passed to - :py:meth:`Table.batch`. - """ - # Work-around Thrift API limitation: the mutation API can only - # delete specified columns, not complete rows, so just list the - # column families once and cache them for later use by the same - # batch instance. - if columns is None: - if self._families is None: - self._families = self._table._column_family_names() - columns = self._families - - if wal is None: - wal = self._wal - - self._mutations[row].extend( - Mutation(isDelete=True, column=column, writeToWAL=wal) - for column in columns) - - self._mutation_count += len(columns) - if self._batch_size and self._mutation_count >= self._batch_size: - self.send() - - # - # Context manager methods - # - - def __enter__(self): - """Called upon entering a ``with`` block""" - return self - - def __exit__(self, exc_type, exc_value, traceback): - """Called upon exiting a ``with`` block""" - # If the 'with' block raises an exception, the batch will not be - # sent to the server. - if self._transaction and exc_type is not None: - return - - self.send() diff --git a/happybase/connection.py b/happybase/connection.py deleted file mode 100644 index 6852bbb..0000000 --- a/happybase/connection.py +++ /dev/null @@ -1,367 +0,0 @@ -# coding: UTF-8 - -""" -HappyBase connection module. -""" - -import logging - -import six -from thriftpy2.thrift import TClient -from thriftpy2.transport import TBufferedTransport, TFramedTransport, TSocket -from thriftpy2.protocol import TBinaryProtocol, TCompactProtocol - -from Hbase_thrift import Hbase, ColumnDescriptor - -from .table import Table -from .util import ensure_bytes, pep8_to_camel_case - -logger = logging.getLogger(__name__) - -STRING_OR_BINARY = (six.binary_type, six.text_type) - -COMPAT_MODES = ('0.90', '0.92', '0.94', '0.96', '0.98') -THRIFT_TRANSPORTS = dict( - buffered=TBufferedTransport, - framed=TFramedTransport, -) -THRIFT_PROTOCOLS = dict( - binary=TBinaryProtocol, - compact=TCompactProtocol, -) - -DEFAULT_HOST = 'localhost' -DEFAULT_PORT = 9090 -DEFAULT_TRANSPORT = 'buffered' -DEFAULT_COMPAT = '0.98' -DEFAULT_PROTOCOL = 'binary' - - -class Connection(object): - """Connection to an HBase Thrift server. - - The `host` and `port` arguments specify the host name and TCP port - of the HBase Thrift server to connect to. If omitted or ``None``, - a connection to the default port on ``localhost`` is made. If - specified, the `timeout` argument specifies the socket timeout in - milliseconds. - - If `autoconnect` is `True` (the default) the connection is made - directly, otherwise :py:meth:`Connection.open` must be called - explicitly before first use. - - The optional `table_prefix` and `table_prefix_separator` arguments - specify a prefix and a separator string to be prepended to all table - names, e.g. when :py:meth:`Connection.table` is invoked. For - example, if `table_prefix` is ``myproject``, all tables will - have names like ``myproject_XYZ``. - - The optional `compat` argument sets the compatibility level for - this connection. Older HBase versions have slightly different Thrift - interfaces, and using the wrong protocol can lead to crashes caused - by communication errors, so make sure to use the correct one. This - value can be either the string ``0.90``, ``0.92``, ``0.94``, or - ``0.96`` (the default). - - The optional `transport` argument specifies the Thrift transport - mode to use. Supported values for this argument are ``buffered`` - (the default) and ``framed``. Make sure to choose the right one, - since otherwise you might see non-obvious connection errors or - program hangs when making a connection. HBase versions before 0.94 - always use the buffered transport. Starting with HBase 0.94, the - Thrift server optionally uses a framed transport, depending on the - argument passed to the ``hbase-daemon.sh start thrift`` command. - The default ``-threadpool`` mode uses the buffered transport; the - ``-hsha``, ``-nonblocking``, and ``-threadedselector`` modes use the - framed transport. - - The optional `protocol` argument specifies the Thrift transport - protocol to use. Supported values for this argument are ``binary`` - (the default) and ``compact``. Make sure to choose the right one, - since otherwise you might see non-obvious connection errors or - program hangs when making a connection. ``TCompactProtocol`` is - a more compact binary format that is typically more efficient to - process as well. ``TBinaryProtocol`` is the default protocol that - Happybase uses. - - .. versionadded:: 0.9 - `protocol` argument - - .. versionadded:: 0.5 - `timeout` argument - - .. versionadded:: 0.4 - `table_prefix_separator` argument - - .. versionadded:: 0.4 - support for framed Thrift transports - - :param str host: The host to connect to - :param int port: The port to connect to - :param int timeout: The socket timeout in milliseconds (optional) - :param bool autoconnect: Whether the connection should be opened directly - :param str table_prefix: Prefix used to construct table names (optional) - :param str table_prefix_separator: Separator used for `table_prefix` - :param str compat: Compatibility mode (optional) - :param str transport: Thrift transport mode (optional) - """ - def __init__(self, host=DEFAULT_HOST, port=DEFAULT_PORT, timeout=None, - autoconnect=True, table_prefix=None, - table_prefix_separator=b'_', compat=DEFAULT_COMPAT, - transport=DEFAULT_TRANSPORT, protocol=DEFAULT_PROTOCOL): - - if transport not in THRIFT_TRANSPORTS: - raise ValueError("'transport' must be one of %s" - % ", ".join(THRIFT_TRANSPORTS.keys())) - - if table_prefix is not None: - if not isinstance(table_prefix, STRING_OR_BINARY): - raise TypeError("'table_prefix' must be a string") - table_prefix = ensure_bytes(table_prefix) - - if not isinstance(table_prefix_separator, STRING_OR_BINARY): - raise TypeError("'table_prefix_separator' must be a string") - table_prefix_separator = ensure_bytes(table_prefix_separator) - - if compat not in COMPAT_MODES: - raise ValueError("'compat' must be one of %s" - % ", ".join(COMPAT_MODES)) - - if protocol not in THRIFT_PROTOCOLS: - raise ValueError("'protocol' must be one of %s" - % ", ".join(THRIFT_PROTOCOLS)) - - # Allow host and port to be None, which may be easier for - # applications wrapping a Connection instance. - self.host = host or DEFAULT_HOST - self.port = port or DEFAULT_PORT - self.timeout = timeout - self.table_prefix = table_prefix - self.table_prefix_separator = table_prefix_separator - self.compat = compat - - self._transport_class = THRIFT_TRANSPORTS[transport] - self._protocol_class = THRIFT_PROTOCOLS[protocol] - self._refresh_thrift_client() - - if autoconnect: - self.open() - - self._initialized = True - - def _refresh_thrift_client(self): - """Refresh the Thrift socket, transport, and client.""" - socket = TSocket(host=self.host, port=self.port, socket_timeout=self.timeout) - - self.transport = self._transport_class(socket) - protocol = self._protocol_class(self.transport, decode_response=False) - self.client = TClient(Hbase, protocol) - - def _table_name(self, name): - """Construct a table name by optionally adding a table name prefix.""" - name = ensure_bytes(name) - if self.table_prefix is None: - return name - return self.table_prefix + self.table_prefix_separator + name - - def open(self): - """Open the underlying transport to the HBase instance. - - This method opens the underlying Thrift transport (TCP connection). - """ - if self.transport.is_open(): - return - - logger.debug("Opening Thrift transport to %s:%d", self.host, self.port) - self.transport.open() - - def close(self): - """Close the underlying transport to the HBase instance. - - This method closes the underlying Thrift transport (TCP connection). - """ - if not self.transport.is_open(): - return - - if logger is not None: - # If called from __del__(), module variables may no longer - # exist. - logger.debug( - "Closing Thrift transport to %s:%d", - self.host, self.port) - - self.transport.close() - - def __del__(self): - try: - self._initialized - except AttributeError: - # Failure from constructor - return - else: - self.close() - - def table(self, name, use_prefix=True): - """Return a table object. - - Returns a :py:class:`happybase.Table` instance for the table - named `name`. This does not result in a round-trip to the - server, and the table is not checked for existence. - - The optional `use_prefix` argument specifies whether the table - prefix (if any) is prepended to the specified `name`. Set this - to `False` if you want to use a table that resides in another - ‘prefix namespace’, e.g. a table from a ‘friendly’ application - co-hosted on the same HBase instance. See the `table_prefix` - argument to the :py:class:`Connection` constructor for more - information. - - :param str name: the name of the table - :param bool use_prefix: whether to use the table prefix (if any) - :return: Table instance - :rtype: :py:class:`Table` - """ - name = ensure_bytes(name) - if use_prefix: - name = self._table_name(name) - return Table(name, self) - - # - # Table administration and maintenance - # - - def tables(self): - """Return a list of table names available in this HBase instance. - - If a `table_prefix` was set for this :py:class:`Connection`, only - tables that have the specified prefix will be listed. - - :return: The table names - :rtype: List of strings - """ - names = self.client.getTableNames() - - # Filter using prefix, and strip prefix from names - if self.table_prefix is not None: - prefix = self._table_name(b'') - offset = len(prefix) - names = [n[offset:] for n in names if n.startswith(prefix)] - - return names - - def create_table(self, name, families): - """Create a table. - - :param str name: The table name - :param dict families: The name and options for each column family - - The `families` argument is a dictionary mapping column family - names to a dictionary containing the options for this column - family, e.g. - - :: - - families = { - 'cf1': dict(max_versions=10), - 'cf2': dict(max_versions=1, block_cache_enabled=False), - 'cf3': dict(), # use defaults - } - connection.create_table('mytable', families) - - These options correspond to the ColumnDescriptor structure in - the Thrift API, but note that the names should be provided in - Python style, not in camel case notation, e.g. `time_to_live`, - not `timeToLive`. The following options are supported: - - * ``max_versions`` (`int`) - * ``compression`` (`str`) - * ``in_memory`` (`bool`) - * ``bloom_filter_type`` (`str`) - * ``bloom_filter_vector_size`` (`int`) - * ``bloom_filter_nb_hashes`` (`int`) - * ``block_cache_enabled`` (`bool`) - * ``time_to_live`` (`int`) - """ - name = self._table_name(name) - if not isinstance(families, dict): - raise TypeError("'families' arg must be a dictionary") - - if not families: - raise ValueError( - "Cannot create table %r (no column families specified)" - % name) - - column_descriptors = [] - for cf_name, options in six.iteritems(families): - if options is None: - options = dict() - - kwargs = dict() - for option_name, value in six.iteritems(options): - kwargs[pep8_to_camel_case(option_name)] = value - - if not cf_name.endswith(':'): - cf_name += ':' - kwargs['name'] = cf_name - - column_descriptors.append(ColumnDescriptor(**kwargs)) - - self.client.createTable(name, column_descriptors) - - def delete_table(self, name, disable=False): - """Delete the specified table. - - .. versionadded:: 0.5 - `disable` argument - - In HBase, a table always needs to be disabled before it can be - deleted. If the `disable` argument is `True`, this method first - disables the table if it wasn't already and then deletes it. - - :param str name: The table name - :param bool disable: Whether to first disable the table if needed - """ - if disable and self.is_table_enabled(name): - self.disable_table(name) - - name = self._table_name(name) - self.client.deleteTable(name) - - def enable_table(self, name): - """Enable the specified table. - - :param str name: The table name - """ - name = self._table_name(name) - self.client.enableTable(name) - - def disable_table(self, name): - """Disable the specified table. - - :param str name: The table name - """ - name = self._table_name(name) - self.client.disableTable(name) - - def is_table_enabled(self, name): - """Return whether the specified table is enabled. - - :param str name: The table name - - :return: whether the table is enabled - :rtype: bool - """ - name = self._table_name(name) - return self.client.isTableEnabled(name) - - def compact_table(self, name, major=False): - """Compact the specified table. - - :param str name: The table name - :param bool major: Whether to perform a major compaction. - """ - name = self._table_name(name) - if major: - self.client.majorCompact(name) - else: - self.client.compact(name) diff --git a/happybase/pool.py b/happybase/pool.py deleted file mode 100644 index 5a9bd90..0000000 --- a/happybase/pool.py +++ /dev/null @@ -1,158 +0,0 @@ -""" -HappyBase connection pool module. -""" - -import contextlib -import logging -import socket -import threading - -from six.moves import queue, range - -from thriftpy2.thrift import TException - -from .connection import Connection - -logger = logging.getLogger(__name__) - -# -# TODO: maybe support multiple Thrift servers. What would a reasonable -# distribution look like? Round-robin? Randomize the list upon -# instantiation and then cycle through it? How to handle (temporary?) -# connection errors? -# - - -class NoConnectionsAvailable(RuntimeError): - """ - Exception raised when no connections are available. - - This happens if a timeout was specified when obtaining a connection, - and no connection became available within the specified timeout. - - .. versionadded:: 0.5 - """ - pass - - -class ConnectionPool(object): - """ - Thread-safe connection pool. - - .. versionadded:: 0.5 - - The `size` argument specifies how many connections this pool - manages. Additional keyword arguments are passed unmodified to the - :py:class:`happybase.Connection` constructor, with the exception of - the `autoconnect` argument, since maintaining connections is the - task of the pool. - - :param int size: the maximum number of concurrently open connections - :param kwargs: keyword arguments passed to - :py:class:`happybase.Connection` - """ - def __init__(self, size, **kwargs): - if not isinstance(size, int): - raise TypeError("Pool 'size' arg must be an integer") - - if not size > 0: - raise ValueError("Pool 'size' arg must be greater than zero") - - logger.debug( - "Initializing connection pool with %d connections", size) - - self._lock = threading.Lock() - self._queue = queue.LifoQueue(maxsize=size) - self._thread_connections = threading.local() - - connection_kwargs = kwargs - connection_kwargs['autoconnect'] = False - - for i in range(size): - connection = Connection(**connection_kwargs) - self._queue.put(connection) - - # The first connection is made immediately so that trivial - # mistakes like unresolvable host names are raised immediately. - # Subsequent connections are connected lazily. - with self.connection(): - pass - - def _acquire_connection(self, timeout=None): - """Acquire a connection from the pool.""" - try: - return self._queue.get(True, timeout) - except queue.Empty: - raise NoConnectionsAvailable( - "No connection available from pool within specified " - "timeout") - - def _return_connection(self, connection): - """Return a connection to the pool.""" - self._queue.put(connection) - - @contextlib.contextmanager - def connection(self, timeout=None): - """ - Obtain a connection from the pool. - - This method *must* be used as a context manager, i.e. with - Python's ``with`` block. Example:: - - with pool.connection() as connection: - pass # do something with the connection - - If `timeout` is specified, this is the number of seconds to wait - for a connection to become available before - :py:exc:`NoConnectionsAvailable` is raised. If omitted, this - method waits forever for a connection to become available. - - :param int timeout: number of seconds to wait (optional) - :return: active connection from the pool - :rtype: :py:class:`happybase.Connection` - """ - - connection = getattr(self._thread_connections, 'current', None) - - return_after_use = False - if connection is None: - # This is the outermost connection requests for this thread. - # Obtain a new connection from the pool and keep a reference - # in a thread local so that nested connection requests from - # the same thread can return the same connection instance. - # - # Note: this code acquires a lock before assigning to the - # thread local; see - # http://emptysquare.net/blog/another-thing-about-pythons- - # threadlocals/ - return_after_use = True - connection = self._acquire_connection(timeout) - with self._lock: - self._thread_connections.current = connection - - try: - # Open connection, because connections are opened lazily. - # This is a no-op for connections that are already open. - connection.open() - - # Return value from the context manager's __enter__() - yield connection - - except (TException, socket.error): - # Refresh the underlying Thrift client if an exception - # occurred in the Thrift layer, since we don't know whether - # the connection is still usable. - logger.info("Replacing tainted pool connection") - connection._refresh_thrift_client() - connection.open() - - # Reraise to caller; see contextlib.contextmanager() docs - raise - - finally: - # Remove thread local reference after the outermost 'with' - # block ends. Afterwards the thread no longer owns the - # connection. - if return_after_use: - del self._thread_connections.current - self._return_connection(connection) diff --git a/happybase/table.py b/happybase/table.py deleted file mode 100644 index bc90c5d..0000000 --- a/happybase/table.py +++ /dev/null @@ -1,598 +0,0 @@ -""" -HappyBase table module. -""" - -import logging -from numbers import Integral -from struct import Struct - -from six import iteritems - -from Hbase_thrift import TScan - -from .util import thrift_type_to_dict, bytes_increment, OrderedDict -from .batch import Batch - -logger = logging.getLogger(__name__) - -pack_i64 = Struct('>q').pack - - -def make_row(cell_map, include_timestamp): - """Make a row dict for a cell mapping like ttypes.TRowResult.columns.""" - return { - name: (cell.value, cell.timestamp) if include_timestamp else cell.value - for name, cell in iteritems(cell_map) - } - - -def make_ordered_row(sorted_columns, include_timestamp): - """Make a row dict for sorted column results from scans.""" - od = OrderedDict() - for column in sorted_columns: - if include_timestamp: - value = (column.cell.value, column.cell.timestamp) - else: - value = column.cell.value - od[column.columnName] = value - return od - - -class Table(object): - """HBase table abstraction class. - - This class cannot be instantiated directly; use :py:meth:`Connection.table` - instead. - """ - def __init__(self, name, connection): - self.name = name - self.connection = connection - - def __repr__(self): - return '<%s.%s name=%r>' % ( - __name__, - self.__class__.__name__, - self.name, - ) - - def families(self): - """Retrieve the column families for this table. - - :return: Mapping from column family name to settings dict - :rtype: dict - """ - descriptors = self.connection.client.getColumnDescriptors(self.name) - families = dict() - for name, descriptor in descriptors.items(): - name = name.rstrip(b':') - families[name] = thrift_type_to_dict(descriptor) - return families - - def _column_family_names(self): - """Retrieve the column family names for this table (internal use)""" - names = self.connection.client.getColumnDescriptors(self.name).keys() - return [name.rstrip(b':') for name in names] - - def regions(self): - """Retrieve the regions for this table. - - :return: regions for this table - :rtype: list of dicts - """ - regions = self.connection.client.getTableRegions(self.name) - return [thrift_type_to_dict(r) for r in regions] - - # - # Data retrieval - # - - def row(self, row, columns=None, timestamp=None, include_timestamp=False): - """Retrieve a single row of data. - - This method retrieves the row with the row key specified in the `row` - argument and returns the columns and values for this row as - a dictionary. - - The `row` argument is the row key of the row. If the `columns` - argument is specified, only the values for these columns will be - returned instead of all available columns. The `columns` - argument should be a list or tuple containing byte strings. Each - name can be a column family, such as ``b'cf1'`` or ``b'cf1:'`` - (the trailing colon is not required), or a column family with a - qualifier, such as ``b'cf1:col1'``. - - If specified, the `timestamp` argument specifies the maximum version - that results may have. The `include_timestamp` argument specifies - whether cells are returned as single values or as `(value, timestamp)` - tuples. - - :param str row: the row key - :param list_or_tuple columns: list of columns (optional) - :param int timestamp: timestamp (optional) - :param bool include_timestamp: whether timestamps are returned - - :return: Mapping of columns (both qualifier and family) to values - :rtype: dict - """ - if columns is not None and not isinstance(columns, (tuple, list)): - raise TypeError("'columns' must be a tuple or list") - - if timestamp is None: - rows = self.connection.client.getRowWithColumns( - self.name, row, columns, {}) - else: - if not isinstance(timestamp, Integral): - raise TypeError("'timestamp' must be an integer") - rows = self.connection.client.getRowWithColumnsTs( - self.name, row, columns, timestamp, {}) - - if not rows: - return {} - - return make_row(rows[0].columns, include_timestamp) - - def rows(self, rows, columns=None, timestamp=None, - include_timestamp=False): - """Retrieve multiple rows of data. - - This method retrieves the rows with the row keys specified in the - `rows` argument, which should be a list (or tuple) of row - keys. The return value is a list of `(row_key, row_dict)` tuples. - - The `columns`, `timestamp` and `include_timestamp` arguments behave - exactly the same as for :py:meth:`row`. - - :param list rows: list of row keys - :param list_or_tuple columns: list of columns (optional) - :param int timestamp: timestamp (optional) - :param bool include_timestamp: whether timestamps are returned - - :return: List of mappings (columns to values) - :rtype: list of dicts - """ - if columns is not None and not isinstance(columns, (tuple, list)): - raise TypeError("'columns' must be a tuple or list") - - if not rows: - # Avoid round-trip if the result is empty anyway - return {} - - if timestamp is None: - results = self.connection.client.getRowsWithColumns( - self.name, rows, columns, {}) - else: - if not isinstance(timestamp, Integral): - raise TypeError("'timestamp' must be an integer") - - # Work-around a bug in the HBase Thrift server where the - # timestamp is only applied if columns are specified, at - # the cost of an extra round-trip. - if columns is None: - columns = self._column_family_names() - - results = self.connection.client.getRowsWithColumnsTs( - self.name, rows, columns, timestamp, {}) - - return [(r.row, make_row(r.columns, include_timestamp)) - for r in results] - - def cells(self, row, column, versions=None, timestamp=None, - include_timestamp=False): - """Retrieve multiple versions of a single cell from the table. - - This method retrieves multiple versions of a cell (if any). - - The `versions` argument defines how many cell versions to - retrieve at most. - - The `timestamp` and `include_timestamp` arguments behave exactly the - same as for :py:meth:`row`. - - :param str row: the row key - :param str column: the column name - :param int versions: the maximum number of versions to retrieve - :param int timestamp: timestamp (optional) - :param bool include_timestamp: whether timestamps are returned - - :return: cell values - :rtype: list of values - """ - if versions is None: - versions = (2 ** 31) - 1 # Thrift type is i32 - elif not isinstance(versions, int): - raise TypeError("'versions' argument must be a number or None") - elif versions < 1: - raise ValueError( - "'versions' argument must be at least 1 (or None)") - - if timestamp is None: - cells = self.connection.client.getVer( - self.name, row, column, versions, {}) - else: - if not isinstance(timestamp, Integral): - raise TypeError("'timestamp' must be an integer") - cells = self.connection.client.getVerTs( - self.name, row, column, timestamp, versions, {}) - - return [ - (c.value, c.timestamp) if include_timestamp else c.value - for c in cells - ] - - def scan(self, row_start=None, row_stop=None, row_prefix=None, - columns=None, filter=None, timestamp=None, - include_timestamp=False, batch_size=1000, scan_batching=None, - limit=None, sorted_columns=False, reverse=False): - """Create a scanner for data in the table. - - This method returns an iterable that can be used for looping over the - matching rows. Scanners can be created in two ways: - - * The `row_start` and `row_stop` arguments specify the row keys where - the scanner should start and stop. It does not matter whether the - table contains any rows with the specified keys: the first row after - `row_start` will be the first result, and the last row before - `row_stop` will be the last result. Note that the start of the range - is inclusive, while the end is exclusive. - - Both `row_start` and `row_stop` can be `None` to specify the start - and the end of the table respectively. If both are omitted, a full - table scan is done. Note that this usually results in severe - performance problems. - - * Alternatively, if `row_prefix` is specified, only rows with row keys - matching the prefix will be returned. If given, `row_start` and - `row_stop` cannot be used. - - The `columns`, `timestamp` and `include_timestamp` arguments behave - exactly the same as for :py:meth:`row`. - - The `filter` argument may be a filter string that will be applied at - the server by the region servers. - - If `limit` is given, at most `limit` results will be returned. - - The `batch_size` argument specifies how many results should be - retrieved per batch when retrieving results from the scanner. Only set - this to a low value (or even 1) if your data is large, since a low - batch size results in added round-trips to the server. - - The optional `scan_batching` is for advanced usage only; it - translates to `Scan.setBatching()` at the Java side (inside the - Thrift server). By setting this value rows may be split into - partial rows, so result rows may be incomplete, and the number - of results returned by te scanner may no longer correspond to - the number of rows matched by the scan. - - If `sorted_columns` is `True`, the columns in the rows returned - by this scanner will be retrieved in sorted order, and the data - will be stored in `OrderedDict` instances. - - If `reverse` is `True`, the scanner will perform the scan in reverse. - This means that `row_start` must be lexicographically after `row_stop`. - Note that the start of the range is inclusive, while the end is - exclusive just as in the forward scan. - - **Compatibility notes:** - - * The `filter` argument is only available when using HBase 0.92 - (or up). In HBase 0.90 compatibility mode, specifying - a `filter` raises an exception. - - * The `sorted_columns` argument is only available when using - HBase 0.96 (or up). - - * The `reverse` argument is only available when using HBase 0.98 - (or up). - - .. versionadded:: 1.1.0 - `reverse` argument - - .. versionadded:: 0.8 - `sorted_columns` argument - - .. versionadded:: 0.8 - `scan_batching` argument - - :param str row_start: the row key to start at (inclusive) - :param str row_stop: the row key to stop at (exclusive) - :param str row_prefix: a prefix of the row key that must match - :param list_or_tuple columns: list of columns (optional) - :param str filter: a filter string (optional) - :param int timestamp: timestamp (optional) - :param bool include_timestamp: whether timestamps are returned - :param int batch_size: batch size for retrieving results - :param bool scan_batching: server-side scan batching (optional) - :param int limit: max number of rows to return - :param bool sorted_columns: whether to return sorted columns - :param bool reverse: whether to perform scan in reverse - - :return: generator yielding the rows matching the scan - :rtype: iterable of `(row_key, row_data)` tuples - """ - if batch_size < 1: - raise ValueError("'batch_size' must be >= 1") - - if limit is not None and limit < 1: - raise ValueError("'limit' must be >= 1") - - if scan_batching is not None and scan_batching < 1: - raise ValueError("'scan_batching' must be >= 1") - - if sorted_columns and self.connection.compat < '0.96': - raise NotImplementedError( - "'sorted_columns' is only supported in HBase >= 0.96") - - if reverse and self.connection.compat < '0.98': - raise NotImplementedError( - "'reverse' is only supported in HBase >= 0.98") - - if row_prefix is not None: - if row_start is not None or row_stop is not None: - raise TypeError( - "'row_prefix' cannot be combined with 'row_start' " - "or 'row_stop'") - - if reverse: - row_start = bytes_increment(row_prefix) - row_stop = row_prefix - else: - row_start = row_prefix - row_stop = bytes_increment(row_prefix) - - if row_start is None: - row_start = '' - - if self.connection.compat == '0.90': - # The scannerOpenWithScan() Thrift function is not - # available, so work around it as much as possible with the - # other scannerOpen*() Thrift functions - - if filter is not None: - raise NotImplementedError( - "'filter' is not supported in HBase 0.90") - - if row_stop is None: - if timestamp is None: - scan_id = self.connection.client.scannerOpen( - self.name, row_start, columns, {}) - else: - scan_id = self.connection.client.scannerOpenTs( - self.name, row_start, columns, timestamp, {}) - else: - if timestamp is None: - scan_id = self.connection.client.scannerOpenWithStop( - self.name, row_start, row_stop, columns, {}) - else: - scan_id = self.connection.client.scannerOpenWithStopTs( - self.name, row_start, row_stop, columns, timestamp, {}) - - else: - # XXX: The "batch_size" can be slightly confusing to those - # familiar with the HBase Java API: - # - # * TScan.caching (Thrift API) translates to - # Scan.setCaching() (Java API) - # - # * TScan.batchSize (Thrift API) translates to - # Scan.setBatching (Java API) . - # - # However, we set Scan.setCaching() to what is called - # batch_size in the HappyBase API, so that the HTable on the - # Java side (inside the Thrift server) retrieves rows from - # the region servers in the same chunk sizes that it sends - # out again to Python (over Thrift). This cannot be tweaked - # (by design). - # - # The Scan.setBatching() value (Java API), which possibly - # cuts rows into multiple partial rows, can be set using the - # slightly strange name scan_batching. - scan = TScan( - startRow=row_start, - stopRow=row_stop, - timestamp=timestamp, - columns=columns, - caching=batch_size, - filterString=filter, - batchSize=scan_batching, - sortColumns=sorted_columns, - reversed=reverse, - ) - scan_id = self.connection.client.scannerOpenWithScan( - self.name, scan, {}) - - logger.debug("Opened scanner (id=%d) on '%s'", scan_id, self.name) - - n_returned = n_fetched = 0 - try: - while True: - if limit is None: - how_many = batch_size - else: - how_many = min(batch_size, limit - n_returned) - - items = self.connection.client.scannerGetList( - scan_id, how_many) - - if not items: - return # scan has finished - - n_fetched += len(items) - - for n_returned, item in enumerate(items, n_returned + 1): - if sorted_columns: - row = make_ordered_row(item.sortedColumns, - include_timestamp) - else: - row = make_row(item.columns, include_timestamp) - - yield item.row, row - - if limit is not None and n_returned == limit: - return # scan has finished - finally: - self.connection.client.scannerClose(scan_id) - logger.debug( - "Closed scanner (id=%d) on '%s' (%d returned, %d fetched)", - scan_id, self.name, n_returned, n_fetched) - - # - # Data manipulation - # - - def put(self, row, data, timestamp=None, wal=True): - """Store data in the table. - - This method stores the data in the `data` argument for the row - specified by `row`. The `data` argument is dictionary that maps columns - to values. Column names must include a family and qualifier part, e.g. - ``b'cf:col'``, though the qualifier part may be the empty string, e.g. - ``b'cf:'``. - - Note that, in many situations, :py:meth:`batch()` is a more appropriate - method to manipulate data. - - .. versionadded:: 0.7 - `wal` argument - - :param str row: the row key - :param dict data: the data to store - :param int timestamp: timestamp (optional) - :param wal bool: whether to write to the WAL (optional) - """ - with self.batch(timestamp=timestamp, wal=wal) as batch: - batch.put(row, data) - - def delete(self, row, columns=None, timestamp=None, wal=True): - """Delete data from the table. - - This method deletes all columns for the row specified by `row`, or only - some columns if the `columns` argument is specified. - - Note that, in many situations, :py:meth:`batch()` is a more appropriate - method to manipulate data. - - .. versionadded:: 0.7 - `wal` argument - - :param str row: the row key - :param list_or_tuple columns: list of columns (optional) - :param int timestamp: timestamp (optional) - :param wal bool: whether to write to the WAL (optional) - """ - with self.batch(timestamp=timestamp, wal=wal) as batch: - batch.delete(row, columns) - - def batch(self, timestamp=None, batch_size=None, transaction=False, - wal=True): - """Create a new batch operation for this table. - - This method returns a new :py:class:`Batch` instance that can be used - for mass data manipulation. The `timestamp` argument applies to all - puts and deletes on the batch. - - If given, the `batch_size` argument specifies the maximum batch size - after which the batch should send the mutations to the server. By - default this is unbounded. - - The `transaction` argument specifies whether the returned - :py:class:`Batch` instance should act in a transaction-like manner when - used as context manager in a ``with`` block of code. The `transaction` - flag cannot be used in combination with `batch_size`. - - The `wal` argument determines whether mutations should be - written to the HBase Write Ahead Log (WAL). This flag can only - be used with recent HBase versions. If specified, it provides - a default for all the put and delete operations on this batch. - This default value can be overridden for individual operations - using the `wal` argument to :py:meth:`Batch.put` and - :py:meth:`Batch.delete`. - - .. versionadded:: 0.7 - `wal` argument - - :param bool transaction: whether this batch should behave like - a transaction (only useful when used as a - context manager) - :param int batch_size: batch size (optional) - :param int timestamp: timestamp (optional) - :param wal bool: whether to write to the WAL (optional) - - :return: Batch instance - :rtype: :py:class:`Batch` - """ - kwargs = locals().copy() - del kwargs['self'] - return Batch(table=self, **kwargs) - - # - # Atomic counters - # - - def counter_get(self, row, column): - """Retrieve the current value of a counter column. - - This method retrieves the current value of a counter column. If the - counter column does not exist, this function initialises it to `0`. - - Note that application code should *never* store a incremented or - decremented counter value directly; use the atomic - :py:meth:`Table.counter_inc` and :py:meth:`Table.counter_dec` methods - for that. - - :param str row: the row key - :param str column: the column name - - :return: counter value - :rtype: int - """ - # Don't query directly, but increment with value=0 so that the counter - # is correctly initialised if didn't exist yet. - return self.counter_inc(row, column, value=0) - - def counter_set(self, row, column, value=0): - """Set a counter column to a specific value. - - This method stores a 64-bit signed integer value in the specified - column. - - Note that application code should *never* store a incremented or - decremented counter value directly; use the atomic - :py:meth:`Table.counter_inc` and :py:meth:`Table.counter_dec` methods - for that. - - :param str row: the row key - :param str column: the column name - :param int value: the counter value to set - """ - self.put(row, {column: pack_i64(value)}) - - def counter_inc(self, row, column, value=1): - """Atomically increment (or decrements) a counter column. - - This method atomically increments or decrements a counter column in the - row specified by `row`. The `value` argument specifies how much the - counter should be incremented (for positive values) or decremented (for - negative values). If the counter column did not exist, it is - automatically initialised to 0 before incrementing it. - - :param str row: the row key - :param str column: the column name - :param int value: the amount to increment or decrement by (optional) - - :return: counter value after incrementing - :rtype: int - """ - return self.connection.client.atomicIncrement( - self.name, row, column, value) - - def counter_dec(self, row, column, value=1): - """Atomically decrement (or increments) a counter column. - - This method is a shortcut for calling :py:meth:`Table.counter_inc` with - the value negated. - - :return: counter value after decrementing - :rtype: int - """ - return self.counter_inc(row, column, -value) diff --git a/happybase/util.py b/happybase/util.py deleted file mode 100644 index 637003a..0000000 --- a/happybase/util.py +++ /dev/null @@ -1,88 +0,0 @@ -""" -HappyBase utility module. - -These functions are not part of the public API. -""" - -import re - -import six -from six.moves import range - -CAPITALS = re.compile('([A-Z])') - - -try: - # Python 2.7 and up - from collections import OrderedDict -except ImportError: - try: - # External package for Python 2.6 - from ordereddict import OrderedDict - except ImportError as exc: - # Stub to throw errors at run-time (not import time) - def OrderedDict(*args, **kwargs): - raise RuntimeError( - "No OrderedDict implementation available; please " - "install the 'ordereddict' Package from PyPI.") - - -def camel_case_to_pep8(name): - """Convert a camel cased name to PEP8 style.""" - converted = CAPITALS.sub(lambda m: '_' + m.groups()[0].lower(), name) - if converted[0] == '_': - return converted[1:] - else: - return converted - - -def pep8_to_camel_case(name, initial=False): - """Convert a PEP8 style name to camel case.""" - chunks = name.split('_') - converted = [s[0].upper() + s[1:].lower() for s in chunks] - if initial: - return ''.join(converted) - else: - return chunks[0].lower() + ''.join(converted[1:]) - - -def thrift_attrs(obj_or_cls): - """Obtain Thrift data type attribute names for an instance or class.""" - return [v[1] for v in obj_or_cls.thrift_spec.values()] - - -def thrift_type_to_dict(obj): - """Convert a Thrift data type to a regular dictionary.""" - return dict((camel_case_to_pep8(attr), getattr(obj, attr)) - for attr in thrift_attrs(obj)) - - -def ensure_bytes(str_or_bytes, binary_type=six.binary_type, - text_type=six.text_type): - """Convert text into bytes, and leaves bytes as-is.""" - if isinstance(str_or_bytes, binary_type): - return str_or_bytes - if isinstance(str_or_bytes, text_type): - return str_or_bytes.encode('utf-8') - raise TypeError( - "input must be a text or byte string, got {}" - .format(type(str_or_bytes).__name__)) - - -def bytes_increment(b): - """Increment and truncate a byte string (for sorting purposes) - - This functions returns the shortest string that sorts after the given - string when compared using regular string comparison semantics. - - This function increments the last byte that is smaller than ``0xFF``, and - drops everything after it. If the string only contains ``0xFF`` bytes, - `None` is returned. - """ - assert isinstance(b, six.binary_type) - b = bytearray(b) # Used subset of its API is the same on Python 2 and 3. - for i in range(len(b) - 1, -1, -1): - if b[i] != 0xff: - b[i] += 1 - return bytes(b[:i+1]) - return None diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index e363f16..0000000 --- a/pytest.ini +++ /dev/null @@ -1,2 +0,0 @@ -[pytest] -addopts = --cov=happybase --cov-report html diff --git a/python-hbase-logos.png b/python-hbase-logos.png deleted file mode 100644 index df6f13f..0000000 Binary files a/python-hbase-logos.png and /dev/null differ diff --git a/python-hbase-logos.xcf b/python-hbase-logos.xcf deleted file mode 100644 index 1ce9e13..0000000 Binary files a/python-hbase-logos.xcf and /dev/null differ diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 86c8e3e..0000000 --- a/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -six -thriftpy2>=0.4 -importlib-resources diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 6127751..0000000 --- a/setup.cfg +++ /dev/null @@ -1,6 +0,0 @@ -[build_sphinx] -source-dir = doc/ -build-dir = doc/build/ - -[wheel] -universal = 1 diff --git a/setup.py b/setup.py deleted file mode 100644 index 1113be3..0000000 --- a/setup.py +++ /dev/null @@ -1,45 +0,0 @@ -from os.path import join, dirname -from setuptools import find_packages, setup - -__version__ = None -exec(open('happybase/_version.py', 'r').read()) - - -def get_file_contents(filename): - with open(join(dirname(__file__), filename)) as fp: - return fp.read() - - -def get_install_requires(): - requirements = get_file_contents('requirements.txt') - install_requires = [] - for line in requirements.split('\n'): - line = line.strip() - if line and not line.startswith('-'): - install_requires.append(line) - return install_requires - - -setup( - name='happybase', - version=__version__, - description="A developer-friendly Python library to interact with " - "Apache HBase", - long_description=get_file_contents('README.rst'), - author="Wouter Bolsterlee", - author_email="uws@xs4all.nl", - url='https://github.com/wbolster/happybase', - install_requires=get_install_requires(), - packages=find_packages(exclude=['tests']), - include_package_data=True, - license="MIT", - classifiers=( - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 3", - "Topic :: Database", - "Topic :: Software Development :: Libraries :: Python Modules", - ) -) diff --git a/test-requirements.txt b/test-requirements.txt deleted file mode 100644 index 90536b9..0000000 --- a/test-requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ --r requirements.txt -coverage -nose -sphinx diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_api.py b/tests/test_api.py deleted file mode 100644 index da1d2fe..0000000 --- a/tests/test_api.py +++ /dev/null @@ -1,594 +0,0 @@ -""" -HappyBase tests. -""" - -import collections -import os -import random -import threading - -import pytest -import six -from six.moves import range - -from happybase import Connection, ConnectionPool, NoConnectionsAvailable - -HAPPYBASE_HOST = os.environ.get('HAPPYBASE_HOST') -HAPPYBASE_PORT = int(os.environ['HAPPYBASE_PORT']) if 'HAPPYBASE_PORT' in os.environ else None -HAPPYBASE_COMPAT = os.environ.get('HAPPYBASE_COMPAT', '0.98') -HAPPYBASE_TRANSPORT = os.environ.get('HAPPYBASE_TRANSPORT', 'buffered') -KEEP_TABLE = ('HAPPYBASE_NO_CLEANUP' in os.environ) - -TABLE_PREFIX = b'happybase_tests_tmp' -TEST_TABLE_NAME = b'test1' - -connection_kwargs = dict( - host=HAPPYBASE_HOST, - port=HAPPYBASE_PORT, - table_prefix=TABLE_PREFIX, - compat=HAPPYBASE_COMPAT, - transport=HAPPYBASE_TRANSPORT, -) - - -# Yuck, globals -connection = table = None - - -def maybe_delete_table(): - if KEEP_TABLE: - return - - if TEST_TABLE_NAME in connection.tables(): - print("Test table already exists; removing it...") - connection.delete_table(TEST_TABLE_NAME, disable=True) - - -def setup_module(): - global connection, table - connection = Connection(**connection_kwargs) - - assert connection is not None - - maybe_delete_table() - cfs = { - 'cf1': {}, - 'cf2': None, - 'cf3': {'max_versions': 1}, - } - connection.create_table(TEST_TABLE_NAME, families=cfs) - - table = connection.table(TEST_TABLE_NAME) - assert table is not None - - -def teardown_module(): - if not KEEP_TABLE: - connection.delete_table(TEST_TABLE_NAME, disable=True) - connection.close() - - -def test_connection_compat(): - with pytest.raises(ValueError): - Connection(compat='0.1.invalid.version') - - -def test_timeout_arg(): - Connection( - timeout=5000, - autoconnect=False) - - -def test_enabling(): - assert connection.is_table_enabled(TEST_TABLE_NAME) - connection.disable_table(TEST_TABLE_NAME) - assert not connection.is_table_enabled(TEST_TABLE_NAME) - connection.enable_table(TEST_TABLE_NAME) - assert connection.is_table_enabled(TEST_TABLE_NAME) - - -def test_compaction(): - connection.compact_table(TEST_TABLE_NAME) - connection.compact_table(TEST_TABLE_NAME, major=True) - - -def test_prefix(): - assert TABLE_PREFIX + b'_' == connection._table_name('') - assert TABLE_PREFIX + b'_foo' == connection._table_name('foo') - - assert connection.table('foobar').name == TABLE_PREFIX + b'_foobar' - assert connection.table('foobar', use_prefix=False).name == b'foobar' - - c = Connection(autoconnect=False) - assert b'foo' == c._table_name('foo') - - with pytest.raises(TypeError): - Connection(autoconnect=False, table_prefix=123) - - with pytest.raises(TypeError): - Connection(autoconnect=False, table_prefix_separator=2.1) - - -def test_stringify(): - str(connection) - repr(connection) - str(table) - repr(table) - - -def test_table_listing(): - names = connection.tables() - assert isinstance(names, list) - assert TEST_TABLE_NAME in names - - -def test_table_regions(): - regions = table.regions() - assert isinstance(regions, list) - - -def test_invalid_table_create(): - with pytest.raises(ValueError): - connection.create_table('sometable', families={}) - with pytest.raises(TypeError): - connection.create_table('sometable', families=0) - with pytest.raises(TypeError): - connection.create_table('sometable', families=[]) - - -def test_families(): - families = table.families() - for name, fdesc in six.iteritems(families): - assert isinstance(name, bytes) - assert isinstance(fdesc, dict) - assert 'name' in fdesc - assert isinstance(fdesc['name'], six.binary_type) - assert 'max_versions' in fdesc - - -def test_put(): - table.put(b'r1', {b'cf1:c1': b'v1', b'cf1:c2': b'v2', b'cf2:c3': b'v3'}) - table.put(b'r1', {b'cf1:c4': b'v2'}, timestamp=2345678) - table.put(b'r1', {b'cf1:c4': b'v2'}, timestamp=1369168852994) - - -def test_atomic_counters(): - row = b'row-with-counter' - column = 'cf1:counter' - - assert 0 == table.counter_get(row, column) - - assert 10 == table.counter_inc(row, column, 10) - assert 10 == table.counter_get(row, column) - - table.counter_set(row, column, 0) - assert 1 == table.counter_inc(row, column) - assert 4 == table.counter_inc(row, column, 3) - assert 4 == table.counter_get(row, column) - - table.counter_set(row, column, 3) - assert 3 == table.counter_get(row, column) - assert 8 == table.counter_inc(row, column, 5) - assert 6 == table.counter_inc(row, column, -2) - assert 5 == table.counter_dec(row, column) - assert 3 == table.counter_dec(row, column, 2) - assert 10 == table.counter_dec(row, column, -7) - - -def test_batch(): - with pytest.raises(TypeError): - table.batch(timestamp='invalid') - - b = table.batch() - b.put(b'row1', {b'cf1:col1': b'value1', - b'cf1:col2': b'value2'}) - b.put(b'row2', {b'cf1:col1': b'value1', - b'cf1:col2': b'value2', - b'cf1:col3': b'value3'}) - b.delete(b'row1', [b'cf1:col4']) - b.delete(b'another-row') - b.send() - - b = table.batch(timestamp=1234567) - b.put(b'row1', {b'cf1:col5': b'value5'}) - b.send() - - with pytest.raises(ValueError): - b = table.batch(batch_size=0) - - with pytest.raises(TypeError): - b = table.batch(transaction=True, batch_size=10) - - -def test_batch_context_managers(): - with table.batch() as b: - b.put(b'row4', {b'cf1:col3': b'value3'}) - b.put(b'row5', {b'cf1:col4': b'value4'}) - b.put(b'row', {b'cf1:col1': b'value1'}) - b.delete(b'row', [b'cf1:col4']) - b.put(b'row', {b'cf1:col2': b'value2'}) - - with table.batch(timestamp=87654321) as b: - b.put(b'row', {b'cf1:c3': b'somevalue', - b'cf1:c5': b'anothervalue'}) - b.delete(b'row', [b'cf1:c3']) - - with pytest.raises(ValueError): - with table.batch(transaction=True) as b: - b.put(b'fooz', {b'cf1:bar': b'baz'}) - raise ValueError - assert {} == table.row(b'fooz', [b'cf1:bar']) - - with pytest.raises(ValueError): - with table.batch(transaction=False) as b: - b.put(b'fooz', {b'cf1:bar': b'baz'}) - raise ValueError - assert {b'cf1:bar': b'baz'} == table.row(b'fooz', [b'cf1:bar']) - - with table.batch(batch_size=5) as b: - for i in range(10): - b.put(('row-batch1-%03d' % i).encode('ascii'), - {b'cf1:': str(i).encode('ascii')}) - - with table.batch(batch_size=20) as b: - for i in range(95): - b.put(('row-batch2-%03d' % i).encode('ascii'), - {b'cf1:': str(i).encode('ascii')}) - assert 95 == len(list(table.scan(row_prefix=b'row-batch2-'))) - - with table.batch(batch_size=20) as b: - for i in range(95): - b.delete(('row-batch2-%03d' % i).encode('ascii')) - assert 0 == len(list(table.scan(row_prefix=b'row-batch2-'))) - - -def test_row(): - row = table.row - put = table.put - row_key = b'row-test' - - with pytest.raises(TypeError): - row(row_key, 123) - - with pytest.raises(TypeError): - row(row_key, timestamp='invalid') - - put(row_key, {b'cf1:col1': b'v1old'}, timestamp=1234) - put(row_key, {b'cf1:col1': b'v1new'}, timestamp=3456) - put(row_key, {b'cf1:col2': b'v2', - b'cf2:col1': b'v3'}) - put(row_key, {b'cf2:col2': b'v4'}, timestamp=1234) - - exp = {b'cf1:col1': b'v1new', - b'cf1:col2': b'v2', - b'cf2:col1': b'v3', - b'cf2:col2': b'v4'} - assert exp == row(row_key) - - exp = {b'cf1:col1': b'v1new', - b'cf1:col2': b'v2'} - assert exp == row(row_key, [b'cf1']) - - exp = {b'cf1:col1': b'v1new', - b'cf2:col2': b'v4'} - assert exp == row(row_key, [b'cf1:col1', b'cf2:col2']) - - exp = {b'cf1:col1': b'v1old', - b'cf2:col2': b'v4'} - assert exp == row(row_key, timestamp=2345) - - assert {} == row(row_key, timestamp=123) - - res = row(row_key, include_timestamp=True) - assert len(res) == 4 - assert b'v1new' == res[b'cf1:col1'][0] - assert isinstance(res[b'cf1:col1'][1], int) - - -def test_rows(): - row_keys = [b'rows-row1', b'rows-row2', b'rows-row3'] - data_old = {b'cf1:col1': b'v1old', b'cf1:col2': b'v2old'} - data_new = {b'cf1:col1': b'v1new', b'cf1:col2': b'v2new'} - - with pytest.raises(TypeError): - table.rows(row_keys, object()) - - with pytest.raises(TypeError): - table.rows(row_keys, timestamp='invalid') - - for row_key in row_keys: - table.put(row_key, data_old, timestamp=4000) - - for row_key in row_keys: - table.put(row_key, data_new) - - assert {} == table.rows([]) - - rows = dict(table.rows(row_keys)) - for row_key in row_keys: - assert row_key in rows - assert data_new == rows[row_key] - - rows = dict(table.rows(row_keys, timestamp=5000)) - for row_key in row_keys: - assert row_key in rows - assert data_old == rows[row_key] - - -def test_cells(): - row_key = b'cell-test' - col = b'cf1:col1' - - table.put(row_key, {col: b'old'}, timestamp=1234) - table.put(row_key, {col: b'new'}) - - with pytest.raises(TypeError): - table.cells(row_key, col, versions='invalid') - - with pytest.raises(TypeError): - table.cells(row_key, col, versions=3, timestamp='invalid') - - with pytest.raises(ValueError): - table.cells(row_key, col, versions=0) - - results = table.cells(row_key, col, versions=1) - assert len(results) == 1 - assert b'new' == results[0] - - results = table.cells(row_key, col) - assert len(results) == 2 - assert b'new' == results[0] - assert b'old' == results[1] - - results = table.cells(row_key, col, timestamp=2345, include_timestamp=True) - assert len(results) == 1 - assert b'old' == results[0][0] - assert 1234 == results[0][1] - - -def test_scan(): - with pytest.raises(TypeError): - list(table.scan(row_prefix='foobar', row_start='xyz')) - - if connection.compat == '0.90': - with pytest.raises(NotImplementedError): - list(table.scan(filter='foo')) - - with pytest.raises(ValueError): - list(table.scan(limit=0)) - - with table.batch() as b: - for i in range(2000): - b.put(('row-scan-a%05d' % i).encode('ascii'), - {b'cf1:col1': b'v1', - b'cf1:col2': b'v2', - b'cf2:col1': b'v1', - b'cf2:col2': b'v2'}) - b.put(('row-scan-b%05d' % i).encode('ascii'), - {b'cf1:col1': b'v1', - b'cf1:col2': b'v2'}) - - def calc_len(scanner): - d = collections.deque(maxlen=1) - d.extend(enumerate(scanner, 1)) - if d: - return d[0][0] - return 0 - - scanner = table.scan(row_start=b'row-scan-a00012', - row_stop=b'row-scan-a00022') - assert 10 == calc_len(scanner) - - scanner = table.scan(row_start=b'xyz') - assert 0 == calc_len(scanner) - - scanner = table.scan(row_start=b'xyz', row_stop=b'zyx') - assert 0 == calc_len(scanner) - - scanner = table.scan(row_start=b'row-scan-', row_stop=b'row-scan-a999', - columns=[b'cf1:col1', b'cf2:col2']) - row_key, row = next(scanner) - assert row_key == b'row-scan-a00000' - assert row == {b'cf1:col1': b'v1', - b'cf2:col2': b'v2'} - assert 2000 - 1 == calc_len(scanner) - - scanner = table.scan(row_prefix=b'row-scan-a', batch_size=499, limit=1000) - assert 1000 == calc_len(scanner) - - scanner = table.scan(row_prefix=b'row-scan-b', batch_size=1, limit=10) - assert 10 == calc_len(scanner) - - scanner = table.scan(row_prefix=b'row-scan-b', batch_size=5, limit=10) - assert 10 == calc_len(scanner) - - scanner = table.scan(timestamp=123) - assert 0 == calc_len(scanner) - - scanner = table.scan(row_prefix=b'row', timestamp=123) - assert 0 == calc_len(scanner) - - scanner = table.scan(batch_size=20) - next(scanner) - next(scanner) - scanner.close() - with pytest.raises(StopIteration): - next(scanner) - - -def test_scan_sorting(): - if connection.compat < '0.96': - return # not supported - - input_row = {} - for i in range(100): - input_row[('cf1:col-%03d' % i).encode('ascii')] = b'' - input_key = b'row-scan-sorted' - table.put(input_key, input_row) - - scan = table.scan(row_start=input_key, sorted_columns=True) - key, row = next(scan) - assert key == input_key - assert ( - sorted(input_row.items()) == - list(row.items())) - - -def test_scan_reverse(): - - if connection.compat < '0.98': - with pytest.raises(NotImplementedError): - list(table.scan(reverse=True)) - return - - with table.batch() as b: - for i in range(2000): - b.put(('row-scan-reverse-%04d' % i).encode('ascii'), - {b'cf1:col1': b'v1', - b'cf1:col2': b'v2'}) - - scan = table.scan(row_prefix=b'row-scan-reverse', reverse=True) - assert 2000 == len(list(scan)) - - scan = table.scan(limit=10, reverse=True) - assert 10 == len(list(scan)) - - scan = table.scan(row_start=b'row-scan-reverse-1999', - row_stop=b'row-scan-reverse-0000', reverse=True) - key, data = next(scan) - assert b'row-scan-reverse-1999' == key - - key, data = list(scan)[-1] - assert b'row-scan-reverse-0001' == key - - -def test_scan_filter_and_batch_size(): - # See issue #54 and #56 - filter = b"SingleColumnValueFilter ('cf1', 'qual1', =, 'binary:val1')" - for k, v in table.scan(filter=filter): - print(v) - - -def test_delete(): - row_key = b'row-test-delete' - data = {b'cf1:col1': b'v1', - b'cf1:col2': b'v2', - b'cf1:col3': b'v3'} - table.put(row_key, {b'cf1:col2': b'v2old'}, timestamp=1234) - table.put(row_key, data) - - table.delete(row_key, [b'cf1:col2'], timestamp=2345) - assert 1 == len(table.cells(row_key, b'cf1:col2', versions=2)) - assert data == table.row(row_key) - - table.delete(row_key, [b'cf1:col1']) - res = table.row(row_key) - assert b'cf1:col1' not in res - assert b'cf1:col2' in res - assert b'cf1:col3' in res - - table.delete(row_key, timestamp=12345) - res = table.row(row_key) - assert b'cf1:col2' in res - assert b'cf1:col3' in res - - table.delete(row_key) - assert {} == table.row(row_key) - - -def test_connection_pool_construction(): - with pytest.raises(TypeError): - ConnectionPool(size='abc') - - with pytest.raises(ValueError): - ConnectionPool(size=0) - - -def test_connection_pool(): - - from thriftpy2.thrift import TException - - def run(): - name = threading.current_thread().name - print("Thread %s starting" % name) - - def inner_function(): - # Nested connection requests must return the same connection - with pool.connection() as another_connection: - assert connection is another_connection - - # Fake an exception once in a while - if random.random() < .25: - print("Introducing random failure") - connection.transport.close() - raise TException("Fake transport exception") - - for i in range(50): - with pool.connection() as connection: - connection.tables() - - try: - inner_function() - except TException: - # This error should have been picked up by the - # connection pool, and the connection should have - # been replaced by a fresh one - pass - - connection.tables() - - print("Thread %s done" % name) - - N_THREADS = 10 - - pool = ConnectionPool(size=3, **connection_kwargs) - threads = [threading.Thread(target=run) for i in range(N_THREADS)] - - for t in threads: - t.start() - - while threads: - for t in threads: - t.join(timeout=.1) - - # filter out finished threads - threads = [t for t in threads if t.is_alive()] - print("%d threads still alive" % len(threads)) - - -def test_pool_exhaustion(): - pool = ConnectionPool(size=1, **connection_kwargs) - - def run(): - with pytest.raises(NoConnectionsAvailable): - with pool.connection(timeout=.1) as connection: - connection.tables() - - with pool.connection(): - # At this point the only connection is assigned to this thread, - # so another thread cannot obtain a connection at this point. - - t = threading.Thread(target=run) - t.start() - t.join() - - -if __name__ == '__main__': - import logging - import sys - - # Dump stacktraces using 'kill -USR1', useful for debugging hanging - # programs and multi threading issues. - try: - import faulthandler - except ImportError: - pass - else: - import signal - faulthandler.register(signal.SIGUSR1) - - logging.basicConfig(level=logging.DEBUG) - - method_name = 'test_%s' % sys.argv[1] - method = globals()[method_name] - method() diff --git a/tests/test_util.py b/tests/test_util.py deleted file mode 100644 index 7ca7c56..0000000 --- a/tests/test_util.py +++ /dev/null @@ -1,54 +0,0 @@ -""" -HappyBase utility tests. -""" - -from codecs import decode, encode - -import happybase.util as util - - -def test_camel_case_to_pep8(): - def check(lower_cc, upper_cc, correct): - - x1 = util.camel_case_to_pep8(lower_cc) - x2 = util.camel_case_to_pep8(upper_cc) - assert correct == x1 - assert correct == x2 - - y1 = util.pep8_to_camel_case(x1, True) - y2 = util.pep8_to_camel_case(x2, False) - assert upper_cc == y1 - assert lower_cc == y2 - - examples = [('foo', 'Foo', 'foo'), - ('fooBar', 'FooBar', 'foo_bar'), - ('fooBarBaz', 'FooBarBaz', 'foo_bar_baz'), - ('fOO', 'FOO', 'f_o_o')] - - for a, b, c in examples: - check(a, b, c) - - -def test_bytes_increment(): - def check(s_hex, expected): - s = decode(s_hex, 'hex') - v = util.bytes_increment(s) - v_hex = encode(v, 'hex') - assert expected == v_hex - assert s < v - - test_values = [ - (b'00', b'01'), - (b'01', b'02'), - (b'fe', b'ff'), - (b'1234', b'1235'), - (b'12fe', b'12ff'), - (b'12ff', b'13'), - (b'424242ff', b'424243'), - (b'4242ffff', b'4243'), - ] - - assert util.bytes_increment(b'\xff\xff\xff') is None - - for s, expected in test_values: - check(s, expected) diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 8df7c6b..0000000 --- a/tox.ini +++ /dev/null @@ -1,9 +0,0 @@ -[tox] -envlist = py27,py34,py35 - -[testenv] -deps= - pytest - pytest-cov - coverage -commands=pytest