Consolidate duplicates

from the Artful MySQL Tips List


It's a common enough problem. Your predecessor's web shop design allowed multiple userIDs for the same email address, and you need to consolidate the data without losing anything, and prevent such dupes in the future.

drop table if exists users, new_users, orders, comments;
create table users(u_id int primary key, name char(32), email char(32));
create table orders(o_id int primary key , u_id int, item char(8));
create table comments(c_id int primary key, o_id int, u_id int, comment char(32));
insert into users values(1,'abe','a@b.com'),(2,'bob','a@b.com'),(3,'carl','b@c.com');
insert into orders values (1,1,'item1'),(2,2,'item2'),(3,3,'item3');
insert into comments values(1,1,1,'amazing'),(2,2,2,'barf!'),(3,3,3,'calamitous');

It's a four-step:

1. Create a new users table with one (the smallest found) u_id per unique email. We use the simplest algorithm from "Within-group aggregates" at http://www.artfulsoftware.com/queries.php.
2. Update orders with those new u_ids;
3. Ditto for comments
4. Save off the old users table, rename the new one

-- 1. Create a new users table that prevents email dupes:
create table new_users
select a.email,a.u_id,a.name
from users a
left join users b on a.email=b.email and a.u_id>b.u_id
where b.u_id is null;
alter table new_users add unique key(email);
select * from new_users;

-- 2. Update orders with new user ids:
update orders o
join users u using(u_id)
join new_users n using(email)
set o.u_id=n.u_id;
select * from orders;

-- 3. Ditto for comments:
update comments c
join users u using(u_id)
join new_users n using(email)
set c.u_id=n.u_id;
select * from comments;

-- 4. Rename users table
rename table users to old_users;
rename table new_users to users;


Return to the Artful MySQL Tips page