一、引言：

　　上篇文章提起关于HBase插入性能优化设计到的五个参数，从参数配置的角度给大家提供了一个性能测试环境的实验代码。根据网友的反馈，基于单线程的模式实现的数据插入毕竟有限。通过个人实测，在我的虚拟机环境下，单线程插入数据的值约为4w/s。集群指标是：CPU双核1.83，虚拟机512M内存，集群部署单点模式。本文给出了基于多线程并发模式的，测试代码案例和实测结果，希望能给大家一些启示：

二、源程序：

  1 import org.apache.hadoop.conf.Configuration;

  2 import org.apache.hadoop.hbase.HBaseConfiguration;

  3 import java.io.BufferedReader;

  4 import java.io.File;

  5 import java.io.FileNotFoundException;

  6 import java.io.FileReader;

  7 import java.io.IOException;

  8 import java.util.ArrayList;

  9 import java.util.List;

 10 import java.util.Random;

 11

 12 import org.apache.hadoop.conf.Configuration;

 13 import org.apache.hadoop.hbase.HBaseConfiguration;

 14 import org.apache.hadoop.hbase.client.HBaseAdmin;

 15 import org.apache.hadoop.hbase.client.HTable;

 16 import org.apache.hadoop.hbase.client.HTableInterface;

 17 import org.apache.hadoop.hbase.client.HTablePool;

 18 import org.apache.hadoop.hbase.client.Put;

 19

 20 public class HBaseImportEx {

 21     static Configuration hbaseConfig = null;

 22     public static HTablePool pool = null;

 23     public static String tableName = "T_TEST_1";

 24     static{

 25          //conf = HBaseConfiguration.create();

 26          Configuration HBASE_CONFIG = new Configuration();

 27          HBASE_CONFIG.set("hbase.master", "192.168.230.133:60000");

 28          HBASE_CONFIG.set("hbase.zookeeper.quorum", "192.168.230.133");

 29          HBASE_CONFIG.set("hbase.zookeeper.property.clientPort", "2181");

 30          hbaseConfig = HBaseConfiguration.create(HBASE_CONFIG);

 31

 32          pool = new HTablePool(hbaseConfig, 1000);

 33     }

 34     /*

 35      * Insert Test single thread

 36      * */

 37     public static void SingleThreadInsert()throws IOException

 38     {

 39         System.out.println("---------开始SingleThreadInsert测试----------");

 40         long start = System.currentTimeMillis();

 41         //HTableInterface table = null;

 42         HTable table = null;

 43         table = (HTable)pool.getTable(tableName);

 44         table.setAutoFlush(false);

 45         table.setWriteBufferSize(24*1024*1024);

 46         //构造测试数据

 47         List<Put> list = new ArrayList<Put>();

 48         int count = 10000;

 49         byte[] buffer = new byte[350];

 50         Random rand = new Random();

 51         for(int i=0;i<count;i++)

 52         {

 53             Put put = new Put(String.format("row %d",i).getBytes());

 54             rand.nextBytes(buffer);

 55             put.add("f1".getBytes(), null, buffer);

 56             //wal=false

 57             put.setWriteToWAL(false);

 58             list.add(put);

 59             if(i%10000 == 0)

 60             {

 61                 table.put(list);

 62                 list.clear();

 63                 table.flushCommits();

 64             }

 65         }

 66         long stop = System.currentTimeMillis();

 67         //System.out.println("WAL="+wal+",autoFlush="+autoFlush+",buffer="+writeBuffer+",count="+count);

 68

 69         System.out.println("插入数据："+count+"共耗时："+ (stop - start)*1.0/1000+"s");

 70

 71         System.out.println("---------结束SingleThreadInsert测试----------");

 72     }

 73     /*

 74      * 多线程环境下线程插入函数

 75      *

 76      * */

 77     public static void InsertProcess()throws IOException

 78     {

 79         long start = System.currentTimeMillis();

 80         //HTableInterface table = null;

 81         HTable table = null;

 82         table = (HTable)pool.getTable(tableName);

 83         table.setAutoFlush(false);

 84         table.setWriteBufferSize(24*1024*1024);

 85         //构造测试数据

 86         List<Put> list = new ArrayList<Put>();

 87         int count = 10000;

 88         byte[] buffer = new byte[256];

 89         Random rand = new Random();

 90         for(int i=0;i<count;i++)

 91         {

 92             Put put = new Put(String.format("row %d",i).getBytes());

 93             rand.nextBytes(buffer);

 94             put.add("f1".getBytes(), null, buffer);

 95             //wal=false

 96             put.setWriteToWAL(false);

 97             list.add(put);

 98             if(i%10000 == 0)

 99             {

100                 table.put(list);

101                 list.clear();

102                 table.flushCommits();

103             }

104         }

105         long stop = System.currentTimeMillis();

106         //System.out.println("WAL="+wal+",autoFlush="+autoFlush+",buffer="+writeBuffer+",count="+count);

107

108         System.out.println("线程:"+Thread.currentThread().getId()+"插入数据："+count+"共耗时："+ (stop - start)*1.0/1000+"s");

109     }

110

111

112     /*

113      * Mutil thread insert test

114      * */

115     public static void MultThreadInsert() throws InterruptedException

116     {

117         System.out.println("---------开始MultThreadInsert测试----------");

118         long start = System.currentTimeMillis();

119         int threadNumber = 10;

120         Thread[] threads=new Thread[threadNumber];

121         for(int i=0;i<threads.length;i++)

122         {

123             threads[i]= new ImportThread();

124             threads[i].start();

125         }

126         for(int j=0;j< threads.length;j++)

127         {

128              (threads[j]).join();

129         }

130         long stop = System.currentTimeMillis();

131

132         System.out.println("MultThreadInsert："+threadNumber*10000+"共耗时："+ (stop - start)*1.0/1000+"s");

133         System.out.println("---------结束MultThreadInsert测试----------");

134     }

135

136     /**

137      * @param args

138      */

139     public static void main(String[] args)  throws Exception{

140         // TODO Auto-generated method stub

141         //SingleThreadInsert();

142         MultThreadInsert();

143

144

145     }

146

147     public static class ImportThread extends Thread{

148         public void HandleThread()

149         {

150             //this.TableName = "T_TEST_1";

151

152

153         }

154         //

155         public void run(){

156             try{

157                 InsertProcess();

158             }

159             catch(IOException e){

160                 e.printStackTrace();

161             }finally{

162                 System.gc();

163                 }

164             }

165         }

166

167 }

三、说明

1.线程数设置需要根据本集群硬件参数，实际测试得出。否则线程过多的情况下，总耗时反而是下降的。

2.单笔提交数对性能的影响非常明显，需要在自己的环境下，找到最理想的数值，这个需要与单条记录的字节数相关。

四、测试结果

---------开始MultThreadInsert测试----------

线程:8插入数据：10000共耗时：1.328s
线程:16插入数据：10000共耗时：1.562s
线程:11插入数据：10000共耗时：1.562s
线程:10插入数据：10000共耗时：1.812s
线程:13插入数据：10000共耗时：2.0s
线程:17插入数据：10000共耗时：2.14s
线程:14插入数据：10000共耗时：2.265s
线程:9插入数据：10000共耗时：2.468s
线程:15插入数据：10000共耗时：2.562s
线程:12插入数据：10000共耗时：2.671s
MultThreadInsert：100000共耗时：2.703s
---------结束MultThreadInsert测试----------

作者：张子良
出处：http://www.cnblogs.com/hadoopdev
本文版权归作者所有，欢迎转载，但未经作者同意必须保留此段声明，且在文章页面明显位置给出原文连接，否则保留追究法律责任的权利。

分类: 大数据, 云计算

笔试面试中常见的位运算用法

　　本文是准备找工作过程中关于位运算的一些积累和记录的整理。注意：部分位运算的处理结果依赖于变量所属类型的字长，使用时请结合具体环境修改。

1.XOR应用

性质：满足交换率、结合律，一个数与其自身异或结果为0。

(1)不用中间变量，交换两数

a = a^b;

b = b^a; //b = b^(a^b),thus b becomes the earlier a

a = a^b; //a = (a^b)^a,thus a becomes the earlier b

扩展：不用异或，同样也能不用中间变量，交换两数

a = a - b;

b = a + b; // b = (a - b)+ b, thus b becomes the earlier a

a = b - a; // a = a - (a - b),  thus a becomes the earlier b

但是这种方式引入了一个陷阱，如果a是一个很大的正数而b是一个很大的负数，那么a-b就会溢出。虽然在b=a+b时可能会通过再一次溢出从而获得真实的a的值，不推荐这种利用未定义行为的解法。

如何理解这种解法？其实第一行是a=a-b还是a=a+b再或者是a=a*b都可以，对应地在第二行把b通过这个式子和b本身的运算求出a即可，再在第三行利用ab的组合值以及原先的a求解b。明显地，使用*比+或-更容易溢出。理解后，完全不必死记硬背这三个式子，看成是解方程就不难了。

(2)寻找只出现1次的一个数，其他数出现偶数次（或寻找唯一一个出现奇数次的数，其他数出现偶数次）

解法：全部数做XOR，最后的结果就是要找的数。

扩展：寻找出现奇数次的数，其他不必寻找的数只出现偶数次。

常见的面试题扩展，思路还是原来的思路，先全部XOR一遍，在获得的结果上，对每一位为1（即可能有两个不同的数，二进制标识中该位不同）进行分组，构造出所有待找出的数。

这么概括很抽象，看一道具体的笔试题吧，通过解题就容易理解了。

（小米2013校招笔试题）一个数组里，除了三个数是唯一出现的，其余的都出现偶数个，找出这三个数中的任一个。比如数组元素为【1, 2,4,5,6,4,2】，只有1,5,6这三个数字是唯一出现的，我们只需要输出1,5,6中的一个就行。

解答：http://blog.****.net/leo115/article/details/8036990

(3)NIM游戏的状态分析

　　请参考《编程之美》1.12 NIM(2) “拈”游戏分析。其核心是，两种完全不同的状态（安全状态和不安全状态）的XOR值恰为0和1。

**2.加法，不用+-*/做加法（《剑指Offer》面试题47）**

迭代版本（《剑指Offer》面试题47）

int Add(int num1,int num2)

{

    int sum,carry;

    do {

        sum = num1^num2;

        carry= (num1 & num2)<<1;

        num1 = sum;

        num2 = carry;

    } while (num2!=0)

    return num1;

}

递归版本（CareerCup 20.1）

int add_no_arithm(int num1,int num2)

{

    if(num2 == 0)

        return num1;

    int sum = a^ b;

    int carry = (a&b)<<1;

    return add_no_arithm(sum,carry);

}

3.求两数的平均数，**不用-、*、/求两数的平均数**

　　似乎是出自《程序员面试宝典》，但是我在第三版第12章没找到原题。用下面的代码就能“神奇地”获得两个整型的平均值

int  average（int x，int y）

{

    return （ (x&y) + ( (x^y)>>1 ) ）;

}

　　解释请看：http://blog.****.net/leo115/article/details/7993110，不过也是转载，原出处疑似已失效。

4.不用*和/做除法（《算法设计手册》面试题1-28）

　　慢速版本和优化版本请参考旧作：http://www.cnblogs.com/wuyuegb2312/p/3257558.html

　　纵观第2、3、4条可以发现，如果限制不允许使用某种四则运算符以及%，就可以在位运算上打主意了。

5.二进制中1的个数

　　不要觉得很trick，这是K&R提到过的。值得注意的是，如果使用C实现，为了避免实现定义不同造成的结果不同，需要把该变量转化为无符号型。

int bitcount(unsigned x)

{

    int b;

    for(b=0;x|=0;x>>=1)

        if(x&01)

            b++;

    return b;

}

　　事实上K&R习题2-9提到了一种更快的算法：

int bitcount(unsigned x)

{

    int b;

    for(b=0;x!=0;x&= x-1)

        b++;

    return b;

}

6.从无符号型x的第p位开始，取n位（K&R）

//最低位是第0位

unsigned getbits(unsigned x,int p, int n)

{

    return (x>>(p+1-n)) & ~(~0<<n);

}

7.利用同余的性质和位运算加速的辗转相减求最大公约数法（《C语言参考手册》第七章）

unsigned binary_gcd(unsigned x, unsigned y)

{

    unsigned temp;

    unsigned common_power_of_two = 0;

    if(x==0)

        return 0;

    if(y==0)

        return 0;

    /*find the largest power of two

    that divides both x and y*/

    while(((x|y)&1)==0) {

        x >>= 1;

        y >>= 1;

        ++common_power_of_two;

    }

    while((x &1) == 0)

        x >>= 1;

    while(y) {

        /*x is odd and y is nonzero here*/

        while((y&1)==0)

            y >>= 1;

        /*x and y are odd here*/

        temp = y;

        if (x>y)

            y = x - y;

        else

            y = y-x;

        x = temp;

        /*Now x has the old value of y,which is odd.

         y is even,because it is the difference of

        two odd numbers therefore it will be right-shifted

        at least once on the next iteration.*/

    }

    return (x<<common_power_of_two);

}

8.不用大于小于号，求两数较大值（CareerCup 19.4）

int getMax(int a,int b)

{

    int c = a - b;

    int k = (c>>31)&0x1;

    int max = a-k*c;

    return max;

}

9.实现位向量

　　这种做法是对空间的高效利用。对《编程珠玑》上位向量实现全面分析的旧作一篇：http://www.cnblogs.com/wuyuegb2312/p/3136831.html

10.其他

　　附上MoreWindows前辈的一篇博文链接：位操作基础篇之位操作全面总结，顺便把该文的目录拿来做个索引：

作者：五岳
出处：http://www.cnblogs.com/wuyuegb2312
对于标题未标注为“转载”的文章均为原创，其版权归作者所有，欢迎转载，但未经作者同意必须保留此段声明，且在文章页面明显位置给出原文连接，否则保留追究法律责任的权利。

分类: C, 笔试面试题

标签: 位运算

图解Javascript之Function

大数据之HBase

好东西分享给大家，但要尊重事实！！！因此特别说明：本图非我本人亲自所作，乃我大天朝网友所绘制。个人感觉此图，覆盖全面，细节考虑甚周全，因此分享给大家，同时在此特别感谢网友的无私分享！

秒客网

大数据之HBase

笔试面试中常见的位运算用法

1.XOR应用

**2.加法，不用+-*/做加法（《剑指Offer》面试题47）**

3.求两数的平均数，**不用-、*、/求两数的平均数**

4.不用*和/做除法（《算法设计手册》面试题1-28）

5.二进制中1的个数

6.从无符号型x的第p位开始，取n位（K&R）

7.利用同余的性质和位运算加速的辗转相减求最大公约数法（《C语言参考手册》第七章）

8.不用大于小于号，求两数较大值（CareerCup 19.4）

9.实现位向量

10.其他

图解Javascript之Function

相关文章

大数据之HBase

笔试面试中常见的位运算用法

1.XOR应用

2.加法，不用+-*/做加法（《剑指Offer》面试题47）

3.求两数的平均数 ，不用-、*、/求两数的平均数

4.不用*和/做除法（《算法设计手册》面试题1-28）

5.二进制中1的个数

6.从无符号型x的第p位开始，取n位（K&R）

7.利用同余的性质和位运算加速的辗转相减求最大公约数法（《C语言参考手册》第七章）

8.不用大于小于号，求两数较大值（CareerCup 19.4）

9.实现位向量

10.其他

图解Javascript之Function

相关文章

**2.加法，不用+-*/做加法（《剑指Offer》面试题47）**

3.求两数的平均数，**不用-、*、/求两数的平均数**