PHP8.1.21版本已发布
vue8.1.21版本已发布
jquery8.1.21版本已发布

hbase RowFilter

原创
2016-06-07 16:29:36 859浏览

RowFilter用于过滤row key Operator Description LESS 小于 LESS_OR_EQUAL 小于等于 EQUAL 等于 NOT_EQUAL 不等于 GREATER_OR_EQUAL 大于等于 GREATER 大于 NO_OP 排除所有 Comparator Description BinaryComparator 使用Bytes.compareTo()比较 BinaryPrefix

RowFilter用于过滤row key

Operator Description
LESS 小于
LESS_OR_EQUAL 小于等于
EQUAL 等于
NOT_EQUAL 不等于
GREATER_OR_EQUAL 大于等于
GREATER 大于
NO_OP 排除所有
Comparator Description
BinaryComparator 使用Bytes.compareTo()比较
BinaryPrefixComparator 和BinaryComparator差不多,从前面开始比较
NullComparator Does?not compare against an actual value but whether a given one is?null, or not?null.
BitComparator Performs?a bitwise comparison, providing a?BitwiseOp?class with?AND,?OR, and?XOR?operators.
RegexStringComparator 正则表达式
SubstringComparator 把数据当成字符串,用contains()来判断
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.BinaryPrefixComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.RegexStringComparator;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.filter.SubstringComparator;
public class TestHbaseRowFilter {
	String tableName = "test_row_filter";
	Configuration config = HBaseConfiguration.create();
	/**
	 * 部分代码来自hbase权威指南
	 * @throws IOException
	 */
	public void testRowFilter() throws IOException {
		HTable table = new HTable(config, tableName);
		Scan scan = new Scan();
		System.out.println("小于等于row010的行");
		Filter filter1 = new RowFilter(CompareFilter.CompareOp.LESS_OR_EQUAL,
				new BinaryComparator("row010".getBytes()));
		scan.setFilter(filter1);
		ResultScanner scanner1 = table.getScanner(scan);
		for (Result res : scanner1) {
			System.out.println(res);
		}
		scanner1.close();
		System.out.println("正则获取结尾为5的行");
		Filter filter2 = new RowFilter(CompareFilter.CompareOp.EQUAL,
				new RegexStringComparator(".*5$"));
		scan.setFilter(filter2);
		ResultScanner scanner2 = table.getScanner(scan);
		for (Result res : scanner2) {
			System.out.println(res);
		}
		scanner2.close();
		System.out.println("包行有5的行");
		Filter filter3 = new RowFilter(CompareFilter.CompareOp.EQUAL,
				new SubstringComparator("5"));
		scan.setFilter(filter3);
		ResultScanner scanner3 = table.getScanner(scan);
		for (Result res : scanner3) {
			System.out.println(res);
		}
		scanner3.close();
		System.out.println("开头是row01的");
		Filter filter4 = new RowFilter(CompareFilter.CompareOp.EQUAL,
				new BinaryPrefixComparator("row01".getBytes()));
		scan.setFilter(filter4);
		ResultScanner scanner4 = table.getScanner(scan);
		for (Result res : scanner4) {
			System.out.println(res);
		}
		scanner3.close();
	}
	/**
	 * 初始化数据
	 */
	public void init() {
		// 创建表和初始化数据
		try {
			HBaseAdmin admin = new HBaseAdmin(config);
			if (!admin.tableExists(tableName)) {
				HTableDescriptor htd = new HTableDescriptor(tableName);
				HColumnDescriptor hcd1 = new HColumnDescriptor("data");
				htd.addFamily(hcd1);
				HColumnDescriptor hcd2 = new HColumnDescriptor("url");
				htd.addFamily(hcd2);
				admin.createTable(htd);
			}
			HTable table = new HTable(config, tableName);
			table.setAutoFlush(false);
			int count = 50;
			for (int i = 1; i 

输出结果

小于等于row010的行
keyvalues={row001/data:col1/1364133382268/Put/vlen=7, row001/url:col1/1364133382268/Put/vlen=6}
keyvalues={row002/data:col2/1364133382268/Put/vlen=7, row002/url:col2/1364133382268/Put/vlen=6}
keyvalues={row003/data:col3/1364133382268/Put/vlen=7, row003/url:col3/1364133382268/Put/vlen=6}
keyvalues={row004/data:col4/1364133382268/Put/vlen=7, row004/url:col4/1364133382268/Put/vlen=6}
keyvalues={row005/data:col5/1364133382268/Put/vlen=7, row005/url:col5/1364133382268/Put/vlen=6}
keyvalues={row006/data:col6/1364133382268/Put/vlen=7, row006/url:col6/1364133382268/Put/vlen=6}
keyvalues={row007/data:col7/1364133382268/Put/vlen=7, row007/url:col7/1364133382268/Put/vlen=6}
keyvalues={row008/data:col8/1364133382268/Put/vlen=7, row008/url:col8/1364133382268/Put/vlen=6}
keyvalues={row009/data:col9/1364133382268/Put/vlen=7, row009/url:col9/1364133382268/Put/vlen=6}
keyvalues={row010/data:col0/1364133382268/Put/vlen=7, row010/url:col0/1364133382268/Put/vlen=6}
正则获取结尾为5的行
keyvalues={row005/data:col5/1364133382268/Put/vlen=7, row005/url:col5/1364133382268/Put/vlen=6}
keyvalues={row015/data:col5/1364133382268/Put/vlen=7, row015/url:col5/1364133382268/Put/vlen=6}
keyvalues={row025/data:col5/1364133382268/Put/vlen=7, row025/url:col5/1364133382268/Put/vlen=6}
keyvalues={row035/data:col5/1364133382268/Put/vlen=7, row035/url:col5/1364133382268/Put/vlen=6}
keyvalues={row045/data:col5/1364133382268/Put/vlen=7, row045/url:col5/1364133382268/Put/vlen=6}
包行有5的行
keyvalues={row005/data:col5/1364133382268/Put/vlen=7, row005/url:col5/1364133382268/Put/vlen=6}
keyvalues={row015/data:col5/1364133382268/Put/vlen=7, row015/url:col5/1364133382268/Put/vlen=6}
keyvalues={row025/data:col5/1364133382268/Put/vlen=7, row025/url:col5/1364133382268/Put/vlen=6}
keyvalues={row035/data:col5/1364133382268/Put/vlen=7, row035/url:col5/1364133382268/Put/vlen=6}
keyvalues={row045/data:col5/1364133382268/Put/vlen=7, row045/url:col5/1364133382268/Put/vlen=6}
keyvalues={row050/data:col0/1364133382268/Put/vlen=7, row050/url:col0/1364133382268/Put/vlen=6}
开头是row01的
keyvalues={row010/data:col0/1364133382268/Put/vlen=7, row010/url:col0/1364133382268/Put/vlen=6}
keyvalues={row011/data:col1/1364133382268/Put/vlen=7, row011/url:col1/1364133382268/Put/vlen=6}
keyvalues={row012/data:col2/1364133382268/Put/vlen=7, row012/url:col2/1364133382268/Put/vlen=6}
keyvalues={row013/data:col3/1364133382268/Put/vlen=7, row013/url:col3/1364133382268/Put/vlen=6}
keyvalues={row014/data:col4/1364133382268/Put/vlen=7, row014/url:col4/1364133382268/Put/vlen=6}
keyvalues={row015/data:col5/1364133382268/Put/vlen=7, row015/url:col5/1364133382268/Put/vlen=6}
keyvalues={row016/data:col6/1364133382268/Put/vlen=7, row016/url:col6/1364133382268/Put/vlen=6}
keyvalues={row017/data:col7/1364133382268/Put/vlen=7, row017/url:col7/1364133382268/Put/vlen=6}
keyvalues={row018/data:col8/1364133382268/Put/vlen=7, row018/url:col8/1364133382268/Put/vlen=6}
keyvalues={row019/data:col9/1364133382268/Put/vlen=7, row019/url:col9/1364133382268/Put/vlen=6}

参考

hbase权威指南

声明:本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系admin@php.cn核实处理。
上一条:Hadoop Rumen介绍 下一条:PyMongo笔记