HBase源码实战:CreateRandomStoreFile

时间:2022-09-14 09:56:40
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver; import java.io.IOException;
import java.util.Arrays;
import java.util.Random; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
import org.apache.hadoop.hbase.util.BloomFilterFactory;
import org.apache.hadoop.io.BytesWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser;
import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter;
import org.apache.hbase.thirdparty.org.apache.commons.cli.Options;
import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException;
import org.apache.hbase.thirdparty.org.apache.commons.cli.PosixParser; /**
* Creates an HFile with random key/value pairs.
*/
public class CreateRandomStoreFile { /**
* As much as this number of bytes can be added or subtracted from key/value
* lengths.
*/
private static final int LEN_VARIATION = 5; private static final Logger LOG =
LoggerFactory.getLogger(CreateRandomStoreFile.class);
private static final String OUTPUT_DIR_OPTION = "o";
private static final String NUM_KV_OPTION = "n";
private static final String HFILE_VERSION_OPTION = "h";
private static final String KEY_SIZE_OPTION = "k";
private static final String VALUE_SIZE_OPTION = "v";
private static final String COMPRESSION_OPTION = "c";
private static final String BLOOM_FILTER_OPTION = "bf";
private static final String BLOCK_SIZE_OPTION = "bs";
private static final String BLOOM_BLOCK_SIZE_OPTION = "bfbs";
private static final String INDEX_BLOCK_SIZE_OPTION = "ibs"; /** The exit code this command-line tool returns on failure */
private static final int EXIT_FAILURE = 1; /** The number of valid key types in a store file */
private static final int NUM_VALID_KEY_TYPES =
KeyValue.Type.values().length - 2; private Options options = new Options(); private int keyPrefixLen, keyLen, rowLen, cfLen, valueLen;
private Random rand; /**
* Runs the tools.
*
* @param args command-line arguments
* @return true in case of success
* @throws IOException
*/
public boolean run(String[] args) throws IOException {
options.addOption(OUTPUT_DIR_OPTION, "output_dir", true,
"Output directory");
options.addOption(NUM_KV_OPTION, "num_kv", true,
"Number of key/value pairs");
options.addOption(KEY_SIZE_OPTION, "key_size", true, "Average key size");
options.addOption(VALUE_SIZE_OPTION, "value_size", true,
"Average value size");
options.addOption(HFILE_VERSION_OPTION, "hfile_version", true,
"HFile version to create");
options.addOption(COMPRESSION_OPTION, "compression", true,
" Compression type, one of "
+ Arrays.toString(Compression.Algorithm.values()));
options.addOption(BLOOM_FILTER_OPTION, "bloom_filter", true,
"Bloom filter type, one of "
+ Arrays.toString(BloomType.values()));
options.addOption(BLOCK_SIZE_OPTION, "block_size", true,
"HFile block size");
options.addOption(BLOOM_BLOCK_SIZE_OPTION, "bloom_block_size", true,
"Compound Bloom filters block size");
options.addOption(INDEX_BLOCK_SIZE_OPTION, "index_block_size", true,
"Index block size"); if (args.length == 0) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp(CreateRandomStoreFile.class.getSimpleName(), options,
true);
return false;
} CommandLineParser parser = new PosixParser();
CommandLine cmdLine;
try {
cmdLine = parser.parse(options, args);
} catch (ParseException ex) {
LOG.error(ex.toString(), ex);
return false;
} if (!cmdLine.hasOption(OUTPUT_DIR_OPTION)) {
LOG.error("Output directory is not specified");
return false;
} if (!cmdLine.hasOption(NUM_KV_OPTION)) {
LOG.error("The number of keys/values not specified");
return false;
} if (!cmdLine.hasOption(KEY_SIZE_OPTION)) {
LOG.error("Key size is not specified");
return false;
} if (!cmdLine.hasOption(VALUE_SIZE_OPTION)) {
LOG.error("Value size not specified");
return false;
} Configuration conf = HBaseConfiguration.create(); Path outputDir = new Path(cmdLine.getOptionValue(OUTPUT_DIR_OPTION)); long numKV = Long.parseLong(cmdLine.getOptionValue(NUM_KV_OPTION));
configureKeyValue(numKV,
Integer.parseInt(cmdLine.getOptionValue(KEY_SIZE_OPTION)),
Integer.parseInt(cmdLine.getOptionValue(VALUE_SIZE_OPTION))); FileSystem fs = FileSystem.get(conf); Compression.Algorithm compr = Compression.Algorithm.NONE;
if (cmdLine.hasOption(COMPRESSION_OPTION)) {
compr = Compression.Algorithm.valueOf(
cmdLine.getOptionValue(COMPRESSION_OPTION));
} BloomType bloomType = BloomType.NONE;
if (cmdLine.hasOption(BLOOM_FILTER_OPTION)) {
bloomType = BloomType.valueOf(cmdLine.getOptionValue(
BLOOM_FILTER_OPTION));
} int blockSize = HConstants.DEFAULT_BLOCKSIZE;
if (cmdLine.hasOption(BLOCK_SIZE_OPTION))
blockSize = Integer.valueOf(cmdLine.getOptionValue(BLOCK_SIZE_OPTION)); if (cmdLine.hasOption(BLOOM_BLOCK_SIZE_OPTION)) {
conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE,
Integer.valueOf(cmdLine.getOptionValue(BLOOM_BLOCK_SIZE_OPTION)));
} if (cmdLine.hasOption(INDEX_BLOCK_SIZE_OPTION)) {
conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY,
Integer.valueOf(cmdLine.getOptionValue(INDEX_BLOCK_SIZE_OPTION)));
} HFileContext meta = new HFileContextBuilder().withCompression(compr)
.withBlockSize(blockSize).build();
StoreFileWriter sfw = new StoreFileWriter.Builder(conf,
new CacheConfig(conf), fs)
.withOutputDir(outputDir)
.withBloomType(bloomType)
.withMaxKeyCount(numKV)
.withFileContext(meta)
.build(); rand = new Random();
LOG.info("Writing " + numKV + " key/value pairs");
for (long i = 0; i < numKV; ++i) {
sfw.append(generateKeyValue(i));
} int numMetaBlocks = rand.nextInt(10) + 1;
LOG.info("Writing " + numMetaBlocks + " meta blocks");
for (int metaI = 0; metaI < numMetaBlocks; ++metaI) {
sfw.getHFileWriter().appendMetaBlock(generateString(),
new BytesWritable(generateValue()));
}
sfw.close(); Path storeFilePath = sfw.getPath();
long fileSize = fs.getFileStatus(storeFilePath).getLen();
LOG.info("Created {}, {} bytes, compression={}", storeFilePath, fileSize, compr.toString()); return true;
} private void configureKeyValue(long numKV, int keyLen, int valueLen) {
numKV = Math.abs(numKV);
keyLen = Math.abs(keyLen);
keyPrefixLen = 0;
while (numKV != 0) {
numKV >>>= 8;
++keyPrefixLen;
} this.keyLen = Math.max(keyPrefixLen, keyLen);
this.valueLen = valueLen; // Arbitrarily split the key into row, column family, and qualifier.
rowLen = keyPrefixLen / 3;
cfLen = keyPrefixLen / 4;
} private int nextInRange(int range) {
return rand.nextInt(2 * range + 1) - range;
} public KeyValue generateKeyValue(long i) {
byte[] k = generateKey(i);
byte[] v = generateValue(); return new KeyValue(
k, 0, rowLen,
k, rowLen, cfLen,
k, rowLen + cfLen, k.length - rowLen - cfLen,
rand.nextLong(),
generateKeyType(rand),
v, 0, v.length);
} public static KeyValue.Type generateKeyType(Random rand) {
if (rand.nextBoolean()) {
// Let's make half of KVs puts.
return KeyValue.Type.Put;
} else {
KeyValue.Type keyType =
KeyValue.Type.values()[1 + rand.nextInt(NUM_VALID_KEY_TYPES)];
if (keyType == KeyValue.Type.Minimum || keyType == KeyValue.Type.Maximum)
{
throw new RuntimeException("Generated an invalid key type: " + keyType
+ ". " + "Probably the layout of KeyValue.Type has changed.");
}
return keyType;
}
} private String generateString() {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < rand.nextInt(10); ++i) {
sb.append((char) ('A' + rand.nextInt(26)));
}
return sb.toString();
} private byte[] generateKey(long i) {
byte[] k = new byte[Math.max(keyPrefixLen, keyLen
+ nextInRange(LEN_VARIATION))];
for (int pos = keyPrefixLen - 1; pos >= 0; --pos) {
k[pos] = (byte) (i & 0xFF);
i >>>= 8;
}
for (int pos = keyPrefixLen; pos < k.length; ++pos) {
k[pos] = (byte) rand.nextInt(256);
}
return k;
} private byte[] generateValue() {
byte[] v = new byte[Math.max(1, valueLen + nextInRange(LEN_VARIATION))];
for (int i = 0; i < v.length; ++i) {
v[i] = (byte) rand.nextInt(256);
}
return v;
} public static void main(String[] args) {
CreateRandomStoreFile app = new CreateRandomStoreFile();
try {
if (!app.run(args))
System.exit(EXIT_FAILURE);
} catch (IOException ex) {
LOG.error(ex.toString(), ex);
System.exit(EXIT_FAILURE);
} } }

HBase源码实战:CreateRandomStoreFile的更多相关文章

  1. HBase源码实战:BufferedMutator

    /** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agr ...

  2. HBase源码实战:ImportTsv

    /** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agr ...

  3. Hbase源码分析:Hbase UI中Requests Per Second的具体含义

    Hbase源码分析:Hbase UI中Requests Per Second的具体含义 让运维加监控,被问到Requests Per Second(见下图)的具体含义是什么?我一时竟回答不上来,虽然大 ...

  4. hbase源码系列(十二)Get、Scan在服务端是如何处理?

    继上一篇讲了Put和Delete之后,这一篇我们讲Get和Scan, 因为我发现这两个操作几乎是一样的过程,就像之前的Put和Delete一样,上一篇我本来只打算写Put的,结果发现Delete也可以 ...

  5. hbase源码带注释版本,放在这里,方便大家下载吧

    看了5个月的hbase源码,记录了一些笔记,如果有需要的朋友可以拿去. 里面总共包括几个主要的工程吧:hbase-common,hbase-client,hbase-prefix-tree,hbase ...

  6. HBase源码学习系列

    转自:http://www.cnblogs.com/cenyuhai/tag/hbase%E6%BA%90%E7%A0%81%E7%B3%BB%E5%88%97/ (mark) hbase源码系列(十 ...

  7. Hbase源码分析:RPC概况

    RPC是hbase中Master,RegionServer和Client三者之间通信交流的纽带.了解hbase的rpc机制能够为通过源码学习hbase奠定良好的基础.因为了解了hbase的rpc机制能 ...

  8. 11 hbase源码系列(十一)Put、Delete在服务端是如何处理

    hbase源码系列(十一)Put.Delete在服务端是如何处理?    在讲完之后HFile和HLog之后,今天我想分享是Put在Region Server经历些了什么?相信前面看了<HTab ...

  9. hbase源码系列(十二)Get、Scan在服务端是如何处理

    hbase源码系列(十二)Get.Scan在服务端是如何处理?   继上一篇讲了Put和Delete之后,这一篇我们讲Get和Scan, 因为我发现这两个操作几乎是一样的过程,就像之前的Put和Del ...

随机推荐

  1. Google Map API V3开发(3)

    Google Map API V3开发(1) Google Map API V3开发(2) Google Map API V3开发(3) Google Map API V3开发(4) Google M ...

  2. Sublim Text3快捷键大全

    Ctrl+Shift+P:打开命令面板Ctrl+P:搜索项目中的文件Ctrl+G:跳转到第几行Ctrl+W:关闭当前打开文件Ctrl+Shift+W:关闭所有打开文件Ctrl+Shift+V:粘贴并格 ...

  3. PHP 采集

    <?php header("content-type:text/html;charset=gbk"); // 要采集的页面的地址 $url = "http://ww ...

  4. mysql 数据库导入 导出,解决 导入 错误问题

    mysqldump -uxxxx -pxxxx -hrds2383jse53pi6ipwmf.mysql.rds.aliyuncs.com legaokao > /root/legaokaodu ...

  5. 【mysql5&period;6】SQL基础

    我买了本深入浅出MySQL, 记录一下笔记. 一.数据定义语言(DDL) 1.创建数据库  create database name; 2.显示所有的数据库  show databases; 3.选择 ...

  6. redhat 下Redis安装

    Redis 官网:http://redis.io/  下载地址: http://redis.io/download 安装方法:   cd opt wget http://download.redis. ...

  7. &lbrack;转载&rsqb;C&num;中获取时间戳(UnixTime)的方法

    .Net中没有封装获取时间戳(UnixTime,相对于1970年1月1日凌晨的毫秒数)的方法.因此本人写了如下方法实现. 提醒在摸索中的朋友,注意方法中的四舍五入.关于讨论四舍五入的方法,可以在这里找 ...

  8. &lbrack;转&rsqb;java位运算(1)

    http://blog.csdn.net/xiaochunyong/article/details/7748713 Java提供的位运算符有:左移( << ).右移( >> ) ...

  9. Python3&period;7&period;0的安装方法

    安装Python3.7.0 1.下载Python-3.7.0.tar.xz包安装 wget https://www.python.org/ftp/python/3.7.0/Python-3.7.0.t ...

  10. iOS 11和xcode9

    最近发现了比较奇怪的问题,就是 ios10.几以前的版本,用xcode9 编写的程序   如果程序写的table是  plain的  ,那么  在  ios10.几及以下版本都会显示成group样式, ...