Uno sguardo nello schema di ShowPlanXML: come individuare i table scans con XPath

Recentemente un cliente mi ha posto questa domanda: “come posso identificare le query ad-hoc che effettuano table scan?”

La risposta e’ semplice e consiste nel guardare il piano di esecuzione. Per esempio questa query:

 SELECT * FROM charge;
GO

Effettuata su una tabella heap senza indici risulta in un table scan, come si puo’ vedere dal piano di esecuzione:

 

Ma se volessimo farlo programmaticamente? Se analizziamo lo schema del piano d’esecuzione (pubblicato all’indirizzo http://schemas.microsoft.com/sqlserver/2004/07/showplan/) vediamo che lo schema e’ semplice:

Dove StmtBlockType e’ un complexType cosi’ definito:

 <xsd:complexType name="StmtBlockType">
     <xsd:annotation>
       <xsd:documentation>The statement block that contains many statements</xsd:documentation>
     </xsd:annotation>
     <xsd:sequence>
       <xsd:choice minOccurs="0" maxOccurs="unbounded">
         <xsd:element name="StmtSimple" type="shp:StmtSimpleType" />
         <xsd:element name="StmtCond" type="shp:StmtCondType" />
         <xsd:element name="StmtCursor" type="shp:StmtCursorType" />
         <xsd:element name="StmtReceive" type="shp:StmtReceiveType" />
         <xsd:element name="StmtUseDb" type="shp:StmtUseDbType" />
       </xsd:choice>
     </xsd:sequence>
   </xsd:complexType>
 

Per la nostra indagine ci interessano gli StmpSimpleType. Al suo interno poniamo l’attenzione su QueryPlanType: 

 <xsd:complexType name="StmtSimpleType">
    <xsd:annotation>
      <xsd:documentation>The simple statement that may or may not contain query plan, UDF plan or Stored Procedure plan </xsd:documentation>
    </xsd:annotation>
    <xsd:complexContent>
      <xsd:extension base="shp:BaseStmtInfoType">
        <xsd:sequence>
          <xsd:element name="QueryPlan" type="shp:QueryPlanType" minOccurs="0" maxOccurs="1" />
          <xsd:element name="UDF" type="shp:FunctionType" minOccurs="0" maxOccurs="unbounded" />
          <xsd:element name="StoredProc" type="shp:FunctionType" minOccurs="0" maxOccurs="1" />
        </xsd:sequence>
      </xsd:extension>
    </xsd:complexContent>
  </xsd:complexType>

Notiamo che, come atteso, possiamo avere un solo QueryPlan per StmtSimple (opzionale).

 <xsd:complexType name="QueryPlanType">
    <xsd:annotation>
      <xsd:documentation>
          New Runtime information:
            DegreeOfParallelism
         MemoryGrant (in kilobytes)
          
            New compile time information:
           mem fractions
           CachedPlanSize (in kilobytes)
           CompileTime (in milliseconds)
           CompileCPU (in milliseconds)
            CompileMemory (in kilobytes)
            Parameter values used during query compilation
          </xsd:documentation>
    </xsd:annotation>
    <xsd:sequence>
      <xsd:element name="InternalInfo" type="shp:InternalInfoType" minOccurs="0" maxOccurs="1" />
      <xsd:element name="MissingIndexes" type="shp:MissingIndexesType" minOccurs="0" maxOccurs="1" />
      <xsd:element name="GuessedSelectivity" type="shp:GuessedSelectivityType" minOccurs="0" maxOccurs="1" />
      <xsd:element name="UnmatchedIndexes" type="shp:UnmatchedIndexesType" minOccurs="0" maxOccurs="1" />
      <xsd:element name="RelOp" type="shp:RelOpType" />
      <xsd:element name="ParameterList" type="shp:ColumnReferenceListType" minOccurs="0" maxOccurs="1" />
    </xsd:sequence>
    <xsd:attribute name="DegreeOfParallelism" type="xsd:int" use="optional" />
    <xsd:attribute name="MemoryGrant" type="xsd:unsignedLong" use="optional" />
    <xsd:attribute name="CachedPlanSize" type="xsd:unsignedLong" use="optional" />
    <xsd:attribute name="CompileTime" type="xsd:unsignedLong" use="optional" />
    <xsd:attribute name="CompileCPU" type="xsd:unsignedLong" use="optional" />
    <xsd:attribute name="CompileMemory" type="xsd:unsignedLong" use="optional" />
    <xsd:attribute name="UsePlan" type="xsd:boolean" use="optional" />
  </xsd:complexType>

Il QueryPlan ha tantissime informazioni, inclusa quella che stiamo cercando: il RelOpType. Lascio a voi esplorare le decine di elementi e attributi che compongono questo tipo complesso. Basti per ora sapere che un attributo e’ quello che fa al caso nostro:

 <xsd:complexType name="RelOpType">
    <xsd:sequence>
      <xsd:element name="OutputList" type="shp:ColumnReferenceListType" />
      <xsd:element name="Warnings" type="shp:WarningsType" minOccurs="0" maxOccurs="1" />
      <xsd:element name="MemoryFractions" type="shp:MemoryFractionsType" minOccurs="0" maxOccurs="1" />
      <xsd:element name="RunTimeInformation" type="shp:RunTimeInformationType" minOccurs="0" maxOccurs="1" />
      <xsd:element name="RunTimePartitionSummary" type="shp:RunTimePartitionSummaryType" minOccurs="0" maxOccurs="1" />     
      <xsd:element name="InternalInfo" type="shp:InternalInfoType" minOccurs="0" maxOccurs="1" />
      <xsd:choice>
        <xsd:element name="Assert" type="shp:FilterType" />
        <xsd:element name="Bitmap" type="shp:BitmapType" />
        <xsd:element name="Collapse" type="shp:CollapseType" />
        <xsd:element name="ComputeScalar" type="shp:ComputeScalarType" />
        <xsd:element name="Concat" type="shp:ConcatType" />
        <xsd:element name="ConstantScan" type="shp:ConstantScanType" />
        <xsd:element name="CreateIndex" type="shp:CreateIndexType" />
        <xsd:element name="DeletedScan" type="shp:RowsetType" />
        <xsd:element name="Extension" type="shp:UDXType" />
        <xsd:element name="Filter" type="shp:FilterType" />
        <xsd:element name="Generic" type="shp:GenericType" />
        <xsd:element name="Hash" type="shp:HashType" />
        <xsd:element name="IndexScan" type="shp:IndexScanType" />
        <xsd:element name="InsertedScan" type="shp:RowsetType" />
        <xsd:element name="LogRowScan" type="shp:RelOpBaseType" />
        <xsd:element name="Merge" type="shp:MergeType" />
        <xsd:element name="MergeInterval" type="shp:SimpleIteratorOneChildType" />
        <xsd:element name="NestedLoops" type="shp:NestedLoopsType" />
        <xsd:element name="OnlineIndex" type="shp:CreateIndexType" />
        <xsd:element name="Parallelism" type="shp:ParallelismType" />
        <xsd:element name="ParameterTableScan" type="shp:RelOpBaseType" />
        <xsd:element name="PrintDataflow" type="shp:RelOpBaseType" />
        <xsd:element name="RemoteFetch" type="shp:RemoteFetchType" />
        <xsd:element name="RemoteModify" type="shp:RemoteModifyType" />
        <xsd:element name="RemoteQuery" type="shp:RemoteQueryType" />
        <xsd:element name="RemoteRange" type="shp:RemoteRangeType" />
        <xsd:element name="RemoteScan" type="shp:RemoteType" />
        <xsd:element name="RowCountSpool" type="shp:SpoolType" />
        <xsd:element name="ScalarInsert" type="shp:ScalarInsertType" />
        <xsd:element name="Segment" type="shp:SegmentType" />
        <xsd:element name="Sequence" type="shp:SequenceType" />
        <xsd:element name="SequenceProject" type="shp:ComputeScalarType" />
        <xsd:element name="SimpleUpdate" type="shp:SimpleUpdateType" />
        <xsd:element name="Sort" type="shp:SortType" />
        <xsd:element name="Split" type="shp:SplitType" />
        <xsd:element name="Spool" type="shp:SpoolType" />
        <xsd:element name="StreamAggregate" type="shp:StreamAggregateType" />
        <xsd:element name="Switch" type="shp:SwitchType" />
        <xsd:element name="TableScan" type="shp:TableScanType" />
        <xsd:element name="TableValuedFunction" type="shp:TableValuedFunctionType" />
        <xsd:element name="Top" type="shp:TopType" />
        <xsd:element name="TopSort" type="shp:TopSortType" />
        <xsd:element name="Update" type="shp:UpdateType" />
      </xsd:choice>
    </xsd:sequence>
    <xsd:attribute name="AvgRowSize" type="xsd:double" use="required" />
    <xsd:attribute name="EstimateCPU" type="xsd:double" use="required" />
    <xsd:attribute name="EstimateIO" type="xsd:double" use="required" />
    <xsd:attribute name="EstimateRebinds" type="xsd:double" use="required" />
    <xsd:attribute name="EstimateRewinds" type="xsd:double" use="required" />
    <xsd:attribute name="GroupExecuted" type="xsd:boolean" use="optional" />
    <xsd:attribute name="EstimateRows" type="xsd:double" use="required" />
    <xsd:attribute name="LogicalOp" type="shp:LogicalOpType" use="required" />
    <xsd:attribute name="NodeId" type="xsd:int" use="required" />
    <xsd:attribute name="Parallel" type="xsd:boolean" use="required" />
    <xsd:attribute name="Partitioned" type="xsd:boolean" use="optional" />
    <xsd:attribute name="PhysicalOp" type="shp:PhysicalOpType" use="required" />
    <xsd:attribute name="EstimatedTotalSubtreeCost" type="xsd:double" use="required" />
    <xsd:attribute name="TableCardinality" type="xsd:double" use="optional" />
  </xsd:complexType>

Questo tipo e’ un enumerato (o meglio, una xsd:string ristretta) che puo’ avere uno di questi valori:

     <xsd:restriction base="xsd:string">
      <xsd:enumeration value="Aggregate" />
      <xsd:enumeration value="Assert" />
      <xsd:enumeration value="Async Concat" />
      <xsd:enumeration value="Bitmap Create" />
      <xsd:enumeration value="Clustered Index Scan" />
      <xsd:enumeration value="Clustered Index Seek" />
      <xsd:enumeration value="Clustered Update" />
      <xsd:enumeration value="Collapse" />
      <xsd:enumeration value="Compute Scalar" />
      <xsd:enumeration value="Concatenation" />
      <xsd:enumeration value="Constant Scan" />
      <xsd:enumeration value="Cross Join" />
      <xsd:enumeration value="Delete" />
      <xsd:enumeration value="Deleted Scan" />
      <xsd:enumeration value="Distinct Sort" />
      <xsd:enumeration value="Distinct" />
      <xsd:enumeration value="Distribute Streams" />
      <xsd:enumeration value="Eager Spool" />
      <xsd:enumeration value="Filter" />
      <xsd:enumeration value="Flow Distinct" />
      <xsd:enumeration value="Full Outer Join" />
      <xsd:enumeration value="Gather Streams" />
      <xsd:enumeration value="Generic" />
      <xsd:enumeration value="Index Scan" />
      <xsd:enumeration value="Index Seek" />
      <xsd:enumeration value="Inner Join" />
      <xsd:enumeration value="Insert" />
      <xsd:enumeration value="Inserted Scan" />
      <xsd:enumeration value="Lazy Spool" />
      <xsd:enumeration value="Left Anti Semi Join" />
      <xsd:enumeration value="Left Outer Join" />
      <xsd:enumeration value="Left Semi Join" />
      <xsd:enumeration value="Log Row Scan" />
      <xsd:enumeration value="Merge Interval" />
      <xsd:enumeration value="Parameter Table Scan" />
      <xsd:enumeration value="Partial Aggregate" />
      <xsd:enumeration value="Print" />
      <xsd:enumeration value="Remote Delete" />
      <xsd:enumeration value="Remote Index Scan" />
      <xsd:enumeration value="Remote Index Seek" />
      <xsd:enumeration value="Remote Insert" />
      <xsd:enumeration value="Remote Query" />
      <xsd:enumeration value="Remote Scan" />
      <xsd:enumeration value="Remote Update" />
      <xsd:enumeration value="Repartition Streams" />
      <xsd:enumeration value="RID Lookup" />
      <xsd:enumeration value="Right Anti Semi Join" />
      <xsd:enumeration value="Right Outer Join" />
      <xsd:enumeration value="Right Semi Join" />
      <xsd:enumeration value="Segment" />
      <xsd:enumeration value="Sequence" />
      <xsd:enumeration value="Sort" />
      <xsd:enumeration value="Split" />
      <xsd:enumeration value="Switch" />
      <xsd:enumeration value="Table-valued function" />
      <xsd:enumeration value="Table Scan" />
      <xsd:enumeration value="Top" />
      <xsd:enumeration value="TopN Sort" />
      <xsd:enumeration value="UDX" />
      <xsd:enumeration value="Union" />
      <xsd:enumeration value="Update" />
      <xsd:enumeration value="Merge" />
    </xsd:restriction>
  </xsd:simpleType>

Ora, tornando alla nostra query, esaminiamo l’XML del piano d’esecuzione (menu contestuale -> Show execution plan XML...) qui semplificato per brevita’:

 <?xml version="1.0" encoding="utf-16"?>
<ShowPlanXML xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" Version="1.1" Build="10.50.2769.0" xmlns="http://schemas.microsoft.com/sqlserver/2004/07/showplan">
  <BatchSequence>
    <Batch>
      <Statements>
        <StmtSimple StatementCompId="1" StatementEstRows="100000" StatementId="1" StatementOptmLevel="TRIVIAL" StatementSubTreeCost="0.543652" StatementText="SELECT * FROM charge;&#xD;&#xA;" StatementType="SELECT" QueryHash="0x6AFC07EFC55B8B3D" QueryPlanHash="0x41C1392D57C141DB">
          <StatementSetOptions ANSI_NULLS="true" ANSI_PADDING="true" ANSI_WARNINGS="true" ARITHABORT="true" CONCAT_NULL_YIELDS_NULL="true" NUMERIC_ROUNDABORT="false" QUOTED_IDENTIFIER="true" />
          <QueryPlan DegreeOfParallelism="1" CachedPlanSize="16" CompileTime="0" CompileCPU="0" CompileMemory="72">
            <RelOp AvgRowSize="45" EstimateCPU="0.110079" EstimateIO="0.433574" EstimateRebinds="0" EstimateRewinds="0" EstimateRows="100000" LogicalOp="Table Scan" NodeId="0" Parallel="false" PhysicalOp="Table Scan" EstimatedTotalSubtreeCost="0.543652" TableCardinality="100000" />             
          </QueryPlan>
        </StmtSimple>
      </Statements>
    </Batch>
  </BatchSequence>
</ShowPlanXML>

Ora che sappiamo dove trovare l’informazione che ci serve possiamo – ad esempio – interrogare la DMV dei piani di esecuzione in cache per identificare le query cachate che hanno eseguito table scans:

 WITH 
    XMLNAMESPACES(DEFAULT 'http://schemas.microsoft.com/sqlserver/2004/07/showplan'),
    CTE AS
    (
        SELECT 
        P.plan_handle, P.cacheobjtype
        FROM sys.dm_exec_cached_plans P
        CROSS APPLY sys.dm_exec_query_plan(P.plan_handle) QP
        CROSS APPLY QP.query_plan.nodes('/ShowPlanXML/.//RelOp[@LogicalOp=''Table Scan'']') AS rp(stmt)
        GROUP BY P.plan_handle, P.cacheobjtype
    )
    SELECT A.plan_handle, A.cacheobjtype, EQP.query_plan FROM CTE A
    CROSS APPLY sys.dm_exec_query_plan(A.plan_handle) EQP

La stessa logica si puo’ adottare per identificare, ad esempio, l’utilizzo di un particolare indice o il grado di parallelismo.

Happy coding,

Francesco Cogno