Thinking about Data Compression

Nell’ultimo post, qui, ho parlato di come poter comprimere i dati, tramite SQLCLR, per quelle colonne che contengono informazioni che non possono essere compresse con gli strumenti nativi che SQL Server, dalla versione 2008 nelle edizioni Enterprise o Datacenter, mette a disposizione.

In questo post condivido un semplice modo per poter capire / studiare come la compressione, a livello di ROW o di PAGE, potrebbe incidere sul nostro database.

L’idea è quella di ottenere una serie di informazioni che possano fornirci le dimensioni attuali e le dimensioni che andremmo ad avere.

 

Qualcosa come:

image

Questo lo script:

 SET NOCOUNT ON;
DECLARE @option varchar( 10 );
SET @option = 'Page';
/* SET @option = 'Row' */

IF OBJECT_ID( 'tempdb..#indexDetails' )IS NOT NULL
    DROP TABLE #indexDetails;

SELECT
       c.name AS schemaName , 
       b.name AS objectName , 
       a.name AS indexName , 
       a.indid , 
       dpages , 
       CONVERT( numeric( 20 , 2 ) , ROUND( dpages * 8 / 1024.0 , 2 ))AS   dataPagesMB, 
       reserved , 
       CONVERT( numeric( 20 , 2 ) , ROUND( reserved * 8 / 1024.0 , 2 ))AS reservedMB, 
       rowcnt 
INTO #indexDetails
  FROM sys.sysindexes AS a
       INNER JOIN sysobjects AS b ON a.id = b.id
       INNER JOIN sys.schemas AS c ON b.uid = c.schema_id
  WHERE 
    b.type = 'U' AND 
    (a.name NOT LIKE '_WA_Sys%' OR a.name IS NULL) AND 
    CONVERT( numeric( 20 , 2 ) , ROUND( dpages * 8 / 1024.0 , 2 )) >= 0
  ORDER BY dpages;

IF OBJECT_ID( 'tempdb..#error' )IS NOT NULL
    DROP TABLE #error;

CREATE TABLE #error
(
    error_desc varchar( 2000 ) , 
    err_number int , 
    err_severity int , 
    err_state int , 
    err_procedure varchar( 126 ) , 
    err_line int , 
    err_message varchar( 2048 )
);

IF OBJECT_ID( 'tempdb..#dataCompressionDetails' )IS NOT NULL
        DROP TABLE #dataCompressionDetails;

CREATE TABLE #dataCompressionDetails
(
    object_name sysname , 
    schema_name sysname , 
    index_id int , 
    partition_number int , 
    size_with_current_compression_setting_KB bigint , 
    size_with_requested_compression_setting_KB bigint , 
    sample_size_with_current_compression_setting_KB bigint , 
    sample_size_with_requested_compression_setting_KB bigint 
);

DECLARE @TSql varchar( 8000 );
DECLARE myCursor CURSOR
    FOR SELECT
               'BEGIN TRY INSERT INTO #dataCompressionDetails EXEC sp_estimate_data_compression_savings     @schema_name = ''' + c.name + ''',     
            @object_name = ''' + b.name + ''',     @index_id = ' + CONVERT( varchar( 10 ) , a.indid ) + ',     @partition_number = NULL,     
            @data_compression = ''' + @option + ''' ; END TRY BEGIN CATCH INSERT INTO #error SELECT    
            ''[' + c.name + '].[' + b.name + '] failed in compression eval. The minimum row size 
            plus internal overhead exceeds the maximum allowable table row size (8060 bytes).'' AS Error_Desc        ,
            ERROR_NUMBER() AS ErrorNumber        ,ERROR_SEVERITY() AS ErrorSeverity        ,ERROR_STATE() AS ErrorState        ,
            ERROR_PROCEDURE() AS ErrorProcedure        ,ERROR_LINE() AS ErrorLine        ,ERROR_MESSAGE() AS ErrorMessage;
            END CATCH;'
          FROM sys.sysindexes AS a
               INNER JOIN sysobjects AS b ON a.id = b.id
               INNER JOIN sys.schemas AS c ON b.uid = c.schema_id
          WHERE 
          b.type = 'U' AND 
          (a.name NOT LIKE '_WA_Sys%' OR a.name IS NULL) AND 
          CONVERT( numeric( 20 , 2 ) , ROUND( dpages * 8 / 1024.0 , 2 )) >= 0
          ORDER BY
                   dpages; 
OPEN myCursor;
FETCH NEXT FROM myCursor INTO @TSql;
WHILE @@FETCH_STATUS = 0
    BEGIN
        EXEC ( @TSql );
        FETCH NEXT FROM myCursor INTO @TSql;
    END;
CLOSE myCursor;
DEALLOCATE myCursor;


/* Total saving */
SELECT
       cast(SUM( size_with_current_compression_setting_KB ) / 1024.0  as decimal(20,2)) AS                                                      [Original size MB] , 
      cast(SUM( size_with_current_compression_setting_KB ) / 1024.0 / 1024.0  as decimal(20,2)) AS                                        [Original size GB] , 
       cast(SUM( size_with_requested_compression_setting_KB ) / 1024.0  as decimal(20,2)) AS                                                    [Compression size MB] , 
      cast(SUM( size_with_requested_compression_setting_KB ) / 1024.0 / 1024.0  as decimal(20,2)) AS                                           [Compression size GB] ,
       cast(SUM( size_with_current_compression_setting_KB - size_with_requested_compression_setting_KB ) / 1024.0 as decimal(20,2)) AS          [Total Saving MB] , 
       cast(SUM( size_with_current_compression_setting_KB - size_with_requested_compression_setting_KB ) / 1024.0 / 1024.0  as decimal(20,2)) AS [Total Saving GB]
FROM #dataCompressionDetails;

/* Summary */
SELECT
       a.schemaName + '.' + a.objectName AS                                                                                          [Table] , 
       a.indexName AS                                                                                                                [Index] , 
       size_with_current_compression_setting_KB AS                                                                                   [Size current KB] , 
       size_with_requested_compression_setting_KB AS                                                                                 [Size compression KB] , 
       Compression_rate = CONVERT( numeric( 20 , 2 ) , ROUND( size_with_requested_compression_setting_KB * 1.0 / size_with_current_compression_setting_KB , 2 )) , 
       size_with_current_compression_setting_KB - size_with_requested_compression_setting_KB AS                                      [Data Compression Saving KB] , 
       CAST((size_with_current_compression_setting_KB - size_with_requested_compression_setting_KB) / 1024.0 AS numeric( 20 , 2 ))AS [Data Compression Saving MB],
      CAST((size_with_current_compression_setting_KB - size_with_requested_compression_setting_KB) / 1024.0 / 1024.0 AS numeric( 20 , 2 ))AS [Data Compression Saving GB]
  FROM #indexDetails AS a
       INNER JOIN #dataCompressionDetails AS b ON a.objectName COLLATE Latin1_General_CI_AS = b.object_name COLLATE Latin1_General_CI_AS
                                    AND a.indid = b.index_id
  WHERE CONVERT( numeric( 20 , 2 ) , ROUND( size_with_requested_compression_setting_KB * 1.0 / size_with_current_compression_setting_KB , 2 )) <= 1
    AND size_with_current_compression_setting_KB > 0
    AND CONVERT( numeric( 20 , 2 ) , ROUND( size_with_requested_compression_setting_KB * 1.0 / size_with_current_compression_setting_KB , 2 )) < 1
  ORDER BY
           size_with_current_compression_setting_KB - size_with_requested_compression_setting_KB DESC;

/* Total */
SELECT
       schemaName + '.' + objectName AS                                                                             [Table] , 
       SUM( size_current_KB )AS                                                                                     [size current KB] , 
       SUM( size_compression_KB )AS                                                                                 [Size compression KB] , 
       CONVERT( numeric( 20 , 2 ) , ROUND( SUM( size_compression_KB ) * 1.0 / SUM( size_current_KB ) * 1.0 , 2 ))AS [Compression rate] , 
       SUM( size_current_KB ) - SUM( size_compression_KB )AS                                                        [DataCompression saving KB] , 
       CAST((SUM( size_current_KB ) - SUM( size_compression_KB )) / 1024.0 AS decimal( 20 , 2 ))AS                  [DataCompression saving MB],
      CAST((SUM( size_current_KB ) - SUM( size_compression_KB )) / 1024.0 / 1024.0 AS decimal( 20 , 2 ))AS         [DataCompression saving GB]
  FROM( 
        SELECT
               a.schemaName , 
               a.objectName , 
               a.indexName , 
               size_with_current_compression_setting_KB AS                                              size_current_KB , 
               size_with_requested_compression_setting_KB AS                                            size_compression_KB , 
               Compression_rate = CONVERT( numeric( 20 , 2 ) , ROUND( size_with_requested_compression_setting_KB * 1.0 / size_with_current_compression_setting_KB , 2 )) , 
               size_with_current_compression_setting_KB - size_with_requested_compression_setting_KB AS DataCompression_Saving_KB
          FROM #indexDetails AS a
               INNER JOIN #dataCompressionDetails AS b ON a.objectName COLLATE Latin1_General_CI_AS = b.object_name COLLATE Latin1_General_CI_AS
                                            AND a.indid = b.index_id
          WHERE CONVERT( numeric( 20 , 2 ) , ROUND( size_with_requested_compression_setting_KB * 1.0 / size_with_current_compression_setting_KB , 2 )) <= 1
            AND size_with_current_compression_setting_KB > 0
            AND CONVERT( numeric( 20 , 2 ) , ROUND( size_with_requested_compression_setting_KB * 1.0 / size_with_current_compression_setting_KB , 2 )) < 1 )AS a
  GROUP BY
           schemaName , 
           objectName;

/* No compression */
SELECT * FROM #error;