mysql去除重复记录案例

时间:2024-03-11 13:32:04

例1,表中有主键(可唯一标识的字段),且该字段为数字类型

1 测试数据

 1 /* 表结构 */
 2 DROP TABLE IF EXISTS `t1`;
 3 CREATE TABLE IF NOT EXISTS `t1`(
 4   `id` INT(1) NOT NULL AUTO_INCREMENT,
 5   `name` VARCHAR(20) NOT NULL,
 6   `add` VARCHAR(20) NOT NULL,
 7   PRIMARY KEY(`id`)
 8 )Engine=InnoDB;
 9 
10 /* 插入测试数据 */
11 INSERT INTO `t1`(`name`,`add`) VALUES
12 (\'abc\',"123"),
13 (\'abc\',"123"),
14 (\'abc\',"321"),
15 (\'abc\',"123"),
16 (\'xzy\',"123"),
17 (\'xzy\',"456"),
18 (\'xzy\',"456"),
19 (\'xzy\',"456"),
20 (\'xzy\',"789"),
21 (\'xzy\',"987"),
22 (\'xzy\',"789"),
23 (\'ijk\',"147"),
24 (\'ijk\',"147"),
25 (\'ijk\',"852"),
26 (\'opq\',"852"),
27 (\'opq\',"963"),
28 (\'opq\',"741"),
29 (\'tpk\',"741"),
30 (\'tpk\',"963"),
31 (\'tpk\',"963"),
32 (\'wer\',"546"),
33 (\'wer\',"546"),
34 (\'once\',"546");
35 
36 SELECT * FROM `t1`;
37 +----+------+-----+
38 | id | name | add |
39 +----+------+-----+
40 |  1 | abc  | 123 |
41 |  2 | abc  | 123 |
42 |  3 | abc  | 321 |
43 |  4 | abc  | 123 |
44 |  5 | xzy  | 123 |
45 |  6 | xzy  | 456 |
46 |  7 | xzy  | 456 |
47 |  8 | xzy  | 456 |
48 |  9 | xzy  | 789 |
49 | 10 | xzy  | 987 |
50 | 11 | xzy  | 789 |
51 | 12 | ijk  | 147 |
52 | 13 | ijk  | 147 |
53 | 14 | ijk  | 852 |
54 | 15 | opq  | 852 |
55 | 16 | opq  | 963 |
56 | 17 | opq  | 741 |
57 | 18 | tpk  | 741 |
58 | 19 | tpk  | 963 |
59 | 20 | tpk  | 963 |
60 | 21 | wer  | 546 |
61 | 22 | wer  | 546 |
62 | 23 | once | 546 |
63 +----+------+-----+
64 rows in set (0.00 sec)

2 查找id最小的重复数据(只查找id字段)

 1 /* 查找id最小的重复数据(只查找id字段) */
 2 
 3 SELECT DISTINCT
 4     MIN(`id`) AS  `id`
 5 FROM
 6     t1
 7 GROUP BY
 8     `name`,
 9     `add`
10 HAVING
11     COUNT(1) > 1;
12 
13 +------+
14 | id   |
15 +------+
16 |    1 |
17 |   12 |
18 |   19 |
19 |   21 |
20 |    6 |
21 |    9 |
22 +------+
23 rows in set (0.00 sec)

3 查找所有重复数据

 1 SELECT `t1`.*
 2 FROM `t1`,(
 3   SELECT `name`,`add`
 4   FROM `t1`
 5   GROUP BY `name`,`add`
 6   HAVING COUNT(1) > 1
 7 ) AS `t2`
 8 WHERE `t1`.`name` = `t2`.`name`
 9        AND `t1`.`add` = `t2`.`add`;
10 
11 +----+------+-----+
12 | id | name | add |
13 +----+------+-----+
14 |  1 | abc  | 123 |
15 |  2 | abc  | 123 |
16 |  4 | abc  | 123 |
17 |  6 | xzy  | 456 |
18 |  7 | xzy  | 456 |
19 |  8 | xzy  | 456 |
20 |  9 | xzy  | 789 |
21 | 11 | xzy  | 789 |
22 | 12 | ijk  | 147 |
23 | 13 | ijk  | 147 |
24 | 19 | tpk  | 963 |
25 | 20 | tpk  | 963 |
26 | 21 | wer  | 546 |
27 | 22 | wer  | 546 |
28 +----+------+-----+
29 rows in set (0.00 sec)

4 查找除id最小的数据外的重复数据

 1 SELECT `t1`.*
 2 FROM `t1`,(
 3   SELECT DISTINCT MIN(`id`) AS `id`,`name`,`add`
 4   FROM `t1`
 5   GROUP BY `name`,`add`
 6   HAVING COUNT(1) > 1
 7 ) AS `t2`
 8 WHERE `t1`.`name` = `t2`.`name`
 9   AND `t1`.`add` = `t2`.`add`
10   AND `t1`.`id` <> `t2`.`id`;
11 +----+------+-----+
12 | id | name | add |
13 +----+------+-----+
14 |  2 | abc  | 123 |
15 |  4 | abc  | 123 |
16 |  7 | xzy  | 456 |
17 |  8 | xzy  | 456 |
18 | 11 | xzy  | 789 |
19 | 13 | ijk  | 147 |
20 | 20 | tpk  | 963 |
21 | 22 | wer  | 546 |
22 +----+------+-----+
23 rows in set (0.00 sec)

5 删除重复数据,只保留一条数据 id最小的

 1 DELETE FROM t1 USING t1,
 2     (
 3         SELECT 
 4           DISTINCT MIN(`id`) AS `id`,`name`,`add`
 5         FROM  t1
 6         GROUP BY  `name`, `add`
 7         HAVING  COUNT(1) > 1
 8     ) AS t2  //选中重复记录id最小的
 9 WHERE
10     t1.`name` = t2.`name`
11     AND t1.`add` = t2.`add`
12     AND t1.id <> t2.id;
13 
14 受影响的行: 8
15 时间: 0.111s
 1 6 查看数据库
 2 
 3 SELECT
 4     t1.id,
 5     t1.`name`,
 6     t1.`add`
 7 FROM
 8     t1
 9 
10 1    abc    123
11 3    abc    321
12 5    xzy    123
13 6    xzy    456
14 9    xzy    789
15 10    xzy    987
16 12    ijk    147
17 14    ijk    852
18 15    opq    852
19 16    opq    963
20 17    opq    741
21 18    tpk    741
22 19    tpk    963
23 21    wer    546
24 23    once    546

 

例2,表中没有主键(可唯一标识的字段),或者主键并非数字类型(也可以删除重复数据,但效率上肯定比较慢)

 1 1 测试数据
 2 
 3 /* 表结构 */
 4 DROP TABLE IF EXISTS `noid`;
 5 CREATE TABLE IF NOT EXISTS `noid`(
 6   `pk` VARCHAR(20) NOT NULL COMMENT \'字符串主键\',
 7   `name` VARCHAR(20) NOT NULL,
 8   `add` VARCHAR(20) NOT NULL,
 9   PRIMARY KEY(`pk`)
10 )Engine=InnoDB;
11 
12 /* 测试数据,与上例一样的测试数据,只是主键变为字符串形式 */
13 INSERT INTO `noid`(`pk`,`name`,`add`) VALUES
14 (\'a\',\'abc\',"123"),
15 (\'b\',\'abc\',"123"),
16 (\'c\',\'abc\',"321"),
17 (\'d\',\'abc\',"123"),
18 (\'e\',\'xzy\',"123"),
19 (\'f\',\'xzy\',"456"),
20 (\'g\',\'xzy\',"456"),
21 (\'h\',\'xzy\',"456"),
22 (\'i\',\'xzy\',"789"),
23 (\'j\',\'xzy\',"987"),
24 (\'k\',\'xzy\',"789"),
25 (\'l\',\'ijk\',"147"),
26 (\'m\',\'ijk\',"147"),
27 (\'n\',\'ijk\',"852"),
28 (\'o\',\'opq\',"852"),
29 (\'p\',\'opq\',"963"),
30 (\'q\',\'opq\',"741"),
31 (\'r\',\'tpk\',"741"),
32 (\'s\',\'tpk\',"963"),
33 (\'t\',\'tpk\',"963"),
34 (\'u\',\'wer\',"546"),
35 (\'v\',\'wer\',"546"),
36 (\'w\',\'once\',"546");
37 
38 SELECT * FROM `noid`;
39 +----+------+-----+
40 | pk | name | add |
41 +----+------+-----+
42 | a  | abc  | 123 |
43 | b  | abc  | 123 |
44 | c  | abc  | 321 |
45 | d  | abc  | 123 |
46 | e  | xzy  | 123 |
47 | f  | xzy  | 456 |
48 | g  | xzy  | 456 |
49 | h  | xzy  | 456 |
50 | i  | xzy  | 789 |
51 | j  | xzy  | 987 |
52 | k  | xzy  | 789 |
53 | l  | ijk  | 147 |
54 | m  | ijk  | 147 |
55 | n  | ijk  | 852 |
56 | o  | opq  | 852 |
57 | p  | opq  | 963 |
58 | q  | opq  | 741 |
59 | r  | tpk  | 741 |
60 | s  | tpk  | 963 |
61 | t  | tpk  | 963 |
62 | u  | wer  | 546 |
63 | v  | wer  | 546 |
64 | w  | once | 546 |
65 +----+------+-----+
66 rows in set (0.00 sec)

 

2 为表添加自增长的id字段

 1 /* 为表添加自增长的id字段 */
 2 ALTER TABLE `noid` ADD `id` INT(1) NOT NULL AUTO_INCREMENT, ADD INDEX `id`(`id`);
 3 Query OK, 23 rows affected (0.16 sec)
 4 Records: 23  Duplicates: 0  Warnings: 0
 5 
 6 SELECT * FROM `noid`;
 7 +----+------+-----+----+
 8 | pk | name | add | id |
 9 +----+------+-----+----+
10 | a  | abc  | 123 |  1 |
11 | b  | abc  | 123 |  2 |
12 | c  | abc  | 321 |  3 |
13 | d  | abc  | 123 |  4 |
14 | e  | xzy  | 123 |  5 |
15 | f  | xzy  | 456 |  6 |
16 | g  | xzy  | 456 |  7 |
17 | h  | xzy  | 456 |  8 |
18 | i  | xzy  | 789 |  9 |
19 | j  | xzy  | 987 | 10 |
20 | k  | xzy  | 789 | 11 |
21 | l  | ijk  | 147 | 12 |
22 | m  | ijk  | 147 | 13 |
23 | n  | ijk  | 852 | 14 |
24 | o  | opq  | 852 | 15 |
25 | p  | opq  | 963 | 16 |
26 | q  | opq  | 741 | 17 |
27 | r  | tpk  | 741 | 18 |
28 | s  | tpk  | 963 | 19 |
29 | t  | tpk  | 963 | 20 |
30 | u  | wer  | 546 | 21 |
31 | v  | wer  | 546 | 22 |
32 | w  | once | 546 | 23 |
33 +----+------+-----+----+
34 rows in set (0.00 sec)

MySQL中必须是有索引的字段才可以使用AUTO_INCREMENT

 

3 删除重复数据与上例一样,记得删除完数据把id字段也删除了

   删除重复数据,只保留一条数据 保留id最小的

 

 1 DELETE FROM `noid`
 2 USING `noid`,(
 3   SELECT DISTINCT MIN(`id`) AS `id`,`name`,`add`
 4   FROM `noid`
 5   GROUP BY `name`,`add`
 6   HAVING COUNT(1) > 1
 7 ) AS `t2`
 8 WHERE `noid`.`name` = `t2`.`name`
 9   AND `noid`.`add` = `t2`.`add`
10   AND `noid`.`id` <> `t2`.`id`;
11 Query OK, 8 rows affected (0.05 sec)

 

 1 = 删除id字段 =
 2 ALTER TABLE `noid` DROP `id`;
 3 Query OK, 15 rows affected (0.16 sec)
 4 Records: 15  Duplicates: 0  Warnings: 0
 5 
 6 SELECT * FROM `noid`;
 7 +----+------+-----+
 8 | pk | name | add |
 9 +----+------+-----+
10 | a  | abc  | 123 |
11 | c  | abc  | 321 |
12 | e  | xzy  | 123 |
13 | f  | xzy  | 456 |
14 | i  | xzy  | 789 |
15 | j  | xzy  | 987 |
16 | l  | ijk  | 147 |
17 | n  | ijk  | 852 |
18 | o  | opq  | 852 |
19 | p  | opq  | 963 |
20 | q  | opq  | 741 |
21 | r  | tpk  | 741 |
22 | s  | tpk  | 963 |
23 | u  | wer  | 546 |
24 | w  | once | 546 |
25 +----+------+-----+
26 rows in set (0.00 sec)