DELIMITER $$ CREATE DEFINER=`root`@`localhost` PROCEDURE `content`(IN `db` VARCHAR(64) CHARSET utf8, IN `tbl` VARCHAR(64) CHARSET utf8, IN `dir` VARCHAR(256) CHARSET utf8) BEGIN SET @cnt_num = 0; SET @cnt_tot = 0; /* create reporting table*/ EXECUTE IMMEDIATE CONCAT ('CREATE DATABASE IF NOT EXISTS ',db,'_profile'); EXECUTE IMMEDIATE CONCAT ('DROP TABLE IF EXISTS ',db,'_profile.',tbl,'_content'); EXECUTE IMMEDIATE CONCAT( 'CREATE TABLE ',db,'_profile.',tbl,'_content ( Id BIGINT NOT NULL AUTO_INCREMENT, Name VARCHAR(64) NULL, Average DOUBLE NULL, Minimum VARCHAR(256) NULL, Maximum VARCHAR(256) NULL, Ranges DOUBLE NULL, CoVar DOUBLE NULL, IsNull VARCHAR(256) NULL, IsZero VARCHAR(256) NULL, IsBlank VARCHAR(256) NULL, IsDistinct VARCHAR(256) NULL, PRIMARY KEY (Id) ) ENGINE = InnoDB'); /*get total number of columns*/ SELECT COUNT(*) INTO @cnt_tot FROM information_schema.COLUMNS WHERE TABLE_SCHEMA=db AND TABLE_NAME=tbl; WHILE @cnt_num < @cnt_tot DO SET @cnt_num = @cnt_num+1; /* get column name*/ SELECT COLUMN_NAME INTO @column_name FROM information_schema.COLUMNS WHERE TABLE_SCHEMA=db AND TABLE_NAME=tbl AND ORDINAL_POSITION = @cnt_num; /* get maximum number of digits in a column */ EXECUTE IMMEDIATE CONCAT( 'SELECT MAX(LENGTH(TRIM(TRAILING \'0\' FROM TRIM(LEADING \'.\' FROM REGEXP_SUBSTR(',@column_name,', \'[.][0-9]*$\'))))) INTO @round FROM ',db,'.',tbl ); /* get measures of central tendency and dispersion*/ EXECUTE IMMEDIATE CONCAT( 'SELECT COUNT(*), IF(MAX(',@column_name,' REGEXP \'^[0-9.]*$\')=0, NULL, ROUND(AVG(CAST(',@column_name,' AS DOUBLE)), IF(@round<1,1,@round))), IF(MAX(',@column_name,' REGEXP \'^[0-9.]*$\')=0, MIN(',@column_name,'), MIN(CAST(',@column_name,' AS DOUBLE))), IF(MAX(',@column_name,' REGEXP \'^[0-9.]*$\')=0, MAX(',@column_name,'), MAX(CAST(',@column_name,' AS DOUBLE))), IF(MAX(',@column_name,' REGEXP \'^[0-9.]*$\')=0, NULL, ROUND((MAX(CAST(',@column_name,' AS DOUBLE))-MIN(CAST(',@column_name,' AS DOUBLE))), IF(@round<1,1,@round))), IF(MAX(',@column_name,' REGEXP \'^[0-9.]*$\')=0, NULL, STDDEV_POP(CAST(',@column_name,' AS DOUBLE))) INTO @n, @average, @minimum, @maximum, @ranges, @sd FROM ',db,'.',tbl ); /* get number and percentage of NULL, zero and blank values*/ EXECUTE IMMEDIATE CONCAT('SELECT COUNT(*) INTO @isnull FROM ',db,'.',tbl,' WHERE ISNULL(',@column_name,')'); SET @isnullperc = ROUND(((@isnull * 100) /@n),1); EXECUTE IMMEDIATE CONCAT('SELECT COUNT(*) INTO @iszero FROM ',db,'.',tbl,' WHERE !ISNULL(',@column_name,') AND ',@column_name,'=\'0\' AND LENGTH(TRIM(',@column_name,'))>0'); SET @iszeroperc = ROUND(((@iszero * 100) /@n),1); EXECUTE IMMEDIATE CONCAT('SELECT COUNT(*) INTO @isblank FROM ',db,'.',tbl,' WHERE !ISNULL(',@column_name,') AND LENGTH(TRIM(',@column_name,'))=0'); SET @isblankperc = ROUND(((@isblank * 100) / @n),1); /* get number of distinct occurrences*/ EXECUTE IMMEDIATE CONCAT('SELECT COUNT(DISTINCT(CAST(',@column_name,' AS CHAR CHARACTER SET utf8) COLLATE utf8_bin)) INTO @isdistinct FROM ',db,'.',tbl); SET @isdistinctperc = ROUND(((@isdistinct * 100) /@n),1); SET @covar = ROUND(((@sd * 100) / @average),1); /*write result into reporting table*/ EXECUTE IMMEDIATE CONCAT( 'INSERT INTO ',db,'_profile.',tbl,'_content (Name, Average, Minimum, Maximum, Ranges, CoVar, IsNull, IsZero, IsBlank, IsDistinct) VALUES (@column_name, @average, @minimum, @maximum, @ranges, @covar, CONCAT(@isnull, \' (\' , @isnullperc, \'%)\'), CONCAT(@iszero, \' (\' , @iszeroperc, \'%)\'), CONCAT(@isblank, \' (\' , @isblankperc, \'%)\'), CONCAT(@isdistinct, \' (\' , @isdistinctperc, \'%)\'))' ); END WHILE; /*export reporting file*/ SET @file = CONCAT(REPLACE(dir,'\\','\\\\'),CONCAT(tbl,'_content_',REPLACE(NOW(),':',''),'.csv')); EXECUTE IMMEDIATE CONCAT('SELECT * INTO OUTFILE \'',@file,'\' FIELDS TERMINATED BY \';\' OPTIONALLY ENCLOSED BY \'\"\' LINES TERMINATED BY \'\\n\' FROM ',db,'_profile.',tbl,'_content'); END$$ DELIMITER ;