Generate rows of random data

from the Artful MySQL Tips List


On GitHub there's a random data generator implemented as a suite of stored routines.

If you prefer to roll your own, here are some simple utility funcs to help you get started.

-- RANDOM STRING OF GIVEN LENGTH
drop function if exists randstr;
delimiter go
create function randstr( len int ) returns text
begin
  declare str text default '';
  repeat 
    set str = concat( str, md5(curtime() ) );
  until length(str) >= len end repeat;
  return left(str,len);
end;
go
delimiter ;

-- RANDOM BIGINT
drop function if exists randbigint;
create function randbigint( minval bigint, maxval bigint ) returns bigint
return Floor( Rand() * (maxval-minval+1) )+10;

-- RANDOM INT
drop function if exists randint;
create function randint( minval int, maxval int ) returns int
return Floor( Rand() * (maxval-minval+1) )+10;

-- RANDOM FLOAT
drop function if exists randfloat;
create function randfloat( minval float, maxval float ) returns float
return Rand() * (maxval-minval+1) + 10;
select randfloat(100,10000),randfloat(100,1000000);

-- RANDOM DATE
drop function if exists randdate;
create function randdate() returns date
return Curdate() - Interval Floor(Rand() * 4000) Day;

-- RANDOM DATETIME/TIMESTAMP
drop function if exists randdatetime; 
create function randdatetime() returns datetime 
return concat( Curdate() - Interval Floor(Rand() * 4000) Day, ' ',
               round( rand() * 24 ), ':', 
               round( rand() * 60 ), ':', 
               round( rand() * 60 ) 
             );
        
-- RANDOM DATETIME
drop function if exists randdatetime;
create function randdatetime() returns datetime
return Now() - Interval Floor(Rand() * 4000) Day;

Using a procedure

Given the table ...
create table test (
  id int not null,
  process_id int not null,
  dt datetime not null,
  high double not null,
  low double not null,
  close double not null,
  updated datetime not null default current_timestamp on update current_timestamp,
  primary key (id, process_id)
) engine=innodb default charset=utf8mb4;
... this procedure uses some of the above functions to insert 10 rows at a time up to a given rowcount. On a modern laptop it inserts a thousand rows in a second ...
 drop procedure if exists testpop;
 delimiter go
 create procedure testpop( prows int )
 begin
   declare n bigint default 0;
   repeat
     insert into test values
       ( randint(1,100000000), randint(1,100000), randdate(), randint(1,1000000), 
         randint(1000,10000000),  randint(100,1000000), randdate() ),
       ( randint(1,100000000), randint(1,100000), randdate(), randint(1,1000000), 
         randint(1000,10000000),  randint(100,1000000), randdate() ),
       ( randint(1,100000000), randint(1,100000), randdate(), randint(1,1000000), 
         randint(1000,10000000),  randint(100,1000000), randdate() ),
       ( randint(1,100000000), randint(1,100000), randdate(), randint(1,1000000), 
         randint(1000,10000000),  randint(100,1000000), randdate() ),
       ( randint(1,100000000), randint(1,100000), randdate(), randint(1,1000000), 
         randint(1000,10000000),  randint(100,1000000), randdate() ),
       ( randint(1,100000000), randint(1,100000), randdate(), randint(1,1000000), 
         randint(1000,10000000),  randint(100,1000000), randdate() ),
       ( randint(1,100000000), randint(1,100000), randdate(), randint(1,1000000), 
         randint(1000,10000000),  randint(100,1000000), randdate() ),
       ( randint(1,100000000), randint(1,100000), randdate(), randint(1,1000000), 
         randint(1000,10000000),  randint(100,1000000), randdate() ),
       ( randint(1,100000000), randint(1,100000), randdate(), randint(1,1000000), 
         randint(1000,10000000),  randint(100,1000000), randdate() ),
       ( randint(1,100000000), randint(1,100000), randdate(), randint(1,1000000), 
         randint(1000,10000000),  randint(100,1000000), randdate() ) ;
   set n=n+10;
   until n >= prows end repeat;
 end;
 go
 delimiter ;
 call testpop(100);
 

Using a Common Table Expression

A Common Table Expression (CTE) can speed up such jobs. MySQL since 8.0 and MariaDB since 10.2 support CTEs. From the MySQL Forum, here's an example of a CTE written to generate a million rows of random temperature values between 20 and 35 and humidity values between 0 and 100 ...
drop table if exists temprand;
CREATE TABLE temprand (
  city varchar(64),
  created_at DATETIME,
  temperature TINYINT,
  humidity TINYINT UNSIGNED
);

SET @@cte_max_recursion_depth = 1000010;

INSERT INTO temprand (city, created_at, temperature, humidity)
  WITH RECURSIVE tempcte (city, created_at, temperature, humidity) AS (
    SELECT 
      'Paris, France', now() AS created_at, 
      FLOOR(20 + RAND() * (36 - 20)), FLOOR(RAND() * 101)
    UNION ALL
    SELECT 
      'Paris, France', created_at + INTERVAL 1 SECOND, 
      FLOOR(20 + RAND() * (36 - 20)), FLOOR(RAND() * 101)
    FROM tempcte
    WHERE created_at < NOW() + INTERVAL 999999 SECOND
  )
  SELECT ALL * FROM tempcte;

select * from temprand order by created_at limit 10;
select * from temprand order by created_at desc limit 10;
select min(temperature), max(temperature), min(humidity), max(humidity),count(*) 
from temprand;
On a modern laptop, it generates a million rows in about 7.5 secs.

Last updated 28 May 2022


Return to the Artful MySQL Tips page