Know your content

During a customer engagement I was writing a PowerShell script to analyze the content of a farm. Since such scripts is very performance consuming we were working on representative site collection backups on a dedicated environment.

The script will iterate through all libraries (within webs, within sites) an write item information like "file type" or "file size" to a csv-file. Given that csv-file we were able to analyze the content by using Microsoft Excel.

On that environment we were working on 675 site collections containing 8181 webs. We were dealing with approximately 60.000 documents. On that specific environment we were figuring out that -who wonders- SharePoint Collaboration Technologies were mostly used for working with Microsoft Office files. The following image will show how the document type are distributed within our content:

 

 

Please feel free to leave a comment if you want me to publish the script as well.

 

Due to responses you can find the script in the following:

 

# ---------------------------------------
# PARAMETERS
# ---------------------------------------

# ---------------------------------------
# CODE
# Do not change anything from now on.
# ---------------------------------------

clear
 
add-pssnapin Microsoft.SharePoint.PowerShell -ea SilentlyContinue

$NumProcessedWebs = 0
$NumProcessedDocs = 0

# Write some verbose messages to a log file in current path
function WriteMessage {
    param ($message)
    $message | Out-File $LogFile -append
    Write-Host $message
}

# Write analysis results to CSV file in current path
function WriteCSVData {
    param ($message, $OutFile)
    $message | Out-File $OutFile -append
}

function CleanString {
    param ([String]$tmp)
    Write-Host $tmp
    ($tmp -replace "`t","")
    return [String]$tmp
}
 
function Analyze-Web {
    param ($URL)
   
    # things we need to capture during analysis fr logging output
    $FieldAvailable = $null
   
    $web = Get-SPWeb $URL -ea SilentlyContinue
   
    if (!([string]::IsNullOrEmpty($web))) {

       
        # Handle recursion
        # -------------------
       
        if ($web.Webs.Count -gt 0) {
            foreach ($OneSubWeb in $web.Webs) {
                WriteMessage "################### DIVING INTO RECURSION #######################"
                WriteMessage ("URL: {0}" -f $OneSubWeb.Url)
                Analyze-Web $OneSubWeb.Url
            } # foreach
        } #if
           

        WriteMessage ("Examining Web with URL '{0}' with template '{1}' and Configuration '{2}'" -f $web.URL, $web.WebTemplate, $Web.Configuration)
        WriteMessage ("This web has {0} subwebs." -f $web.Webs.Count)
        $Script:NumProcessedWebs++
        WriteMessage ("Web Number: {0}" -f $Script:NumProcessedWebs)
       
        # Analyze items
        # ---------------
       
        $web.GetListsOfType([Microsoft.SharePoint.SPBaseType]::DocumentLibrary) | ? {$_.Hidden -eq $False} | % {
            $LibraryTitle = $_.Title

            # Analyze items
            if ( ($_.Hidden -eq $false) -and `
                    !($_.Title -like "cache") -and `
                    !($_.Title -like "Style Library") -and `
                    !($_.Title -like "*nkatalog") -and `
                    !($_.Title -like "Webpartkatalog") -and `
                    !($_.Title -like "Site Pages") -and `
                    !($_.Title -like "Gestaltungsvorlagenkatalog") )
            {

                foreach ($listItem in $_.Items) {
               
                    # Prepare data to be dumped
                    # ------------------------------------
                    $fileSize = $listItem.File.Length
                    $fileSizeInKB = $fileSize / 1024
                   
                    $lastDot = $listItem.Name.LastIndexOf(".");
                    if ($lastDot -gt 0) {
                        $itemExtension = $listItem.Name.Substring($lastDot + 1);
                    }
                   
                    # check the document CheckOut type
                    # ------------------------------------
                    [Microsoft.SharePoint.SPFile]$theFile = $web.GetFile($listItem.Url)
                   
                    # dump data
                    # ------------------------------------
                    WriteCSVData ("{0}`t{1}`t{2}`t{3}`t{4}`t{5}`t{6}`t{7}`t{8}`t{9}`t{10}`t{11}" -f `
                    $web.Url, ($_.Title -replace "`t",""), $listItem.Url, ($listItem.Name -replace "`t",""), ($listItem.Title -replace "`t",""), $itemExtension, ("{0:N2}" -f $fileSizeInKB),`
                    $listItem["Created"], $listItem["Created_x0020_By"], $listItem["Modified"], $listItem["Modified_x0020_By"], $theFile.CheckOutType) $CSVFileItems
                    $script:NumProcessedDocs++

                } # foreach $listItem
               
            } # if

        } # Libraries

        $web.Dispose()

    } else {
        WriteMessage ("{0} not found." -f $URL)
    }
} # function Check-SiteCollection

# create the logfile to verbose all output to
$TimeStamp = (Get-Date -format "yyyyMMdd-hhmmss")
$LogFile = ("AnalyzeSharePointContent_{0}.log" -f $TimeStamp)
$CSVFileItems = ("AnalyzeSharePointContent_{0}.csv" -f $TimeStamp)

$NumWebApps = 0
$NumSiteColls = 0

WriteCSVData "URL`tListTitle`tItemURL`tItemName`tItemTitle`tExtension`tFileSize`tCreated`tCreatedBy`tModified`tModifiedBy`tCheckOutType" $CSVFileItems

Start-SPAssignment -global

Get-SPWebApplication | fl Id, Url, DisplayName

WriteMessage "Select 'Id' of web application to analyze (copy & paste from above): "
$WebAppId = Read-Host "Id: "

WriteMessage ("Selected WebApplication-ID: {0}" -f $WebAppId)

$Time = Measure-Command {
    $WebApp = Get-SPWebApplication | ? {$_.Id -eq $WebAppId }

    WriteMessage "*****************************************************"
    WriteMessage ("WebApplication: '{0}'" -f $WebApp.Name)
    WriteMessage "*****************************************************"
    $WebApp.Sites | % {
        $NumSitecolls++
        WriteMessage (">>> SiteCollection: '{0}'" -f $_.Url)

        Analyze-Web $_.Url

    }
}

Stop-SPAssignment -global

WriteMessage $Time
WriteMessage ("Processed number of site collections: {0}" -f $NumSiteColls)
WriteMessage ("Processed number of webs: {0}" -f $NumProcessedWebs)
WriteMessage ("Processed number of documents: {0}" -f $NumProcessedDocs)