Here we have a little fun pulling down HTML renditions of files using the File Analysis Suite and then encrypting any sensitive data found in the file using Voltage Format Preserving Encryption. This is all wrapped into a custom workbook activity allowing you to generate a "protected" rendition for a set of files in a streamlined fashion.
# Encrypt Text in HTML Rendition of File with Voltage SDA
# 02/02/2023 - Patrick Johnson
# ***Custom workbook activity to retrieve HTML renditions and encrypt text in rendition with Voltage SDA***
# 1) Checks for a FAS custom activity called "encrypt-in-file"
# 2) Downloads a HTML view rendition of the file in the workbook. It will check if file has been collected or not. If collected, the preview-content view is used for a better HTML rendition
# 3) Retrieves the privacy metadata for the file
# 4) Calls Voltage FPE Protect for each privacy metadata attribute using the "Auto" Format
# 5) Performs a search and replace on the HTML file, replacing the original privacy metadata attribute with the corresponding Voltage FPE response
# 6) Saves the HTML file back to the original source location
# NOTE: You must have access to the file share repository where the files are located when running this script with the bool $copyFileToSourcePath set to $true.
$pass = Read-Host 'What is your password?'
$outputDir = "C:\Protected_Output\"
$tempDir = "C:\Protected_Output\Temp\"
$voltageSDA = "https://voltage-pp-0000.YOUR_VOLTAGESDA_GOES_HERE/vibesimple/rest/v1/protect"
#Setting the below value to true will copy the file to the original source fileshare path and delete the file from $outputDir
[bool]$copyFileToSourcePath = $false
#Authentication details
$auth = @"
{"tenantId":"YOUR_TENANT",
"user":"YOUR_USERNAME",
"password":"$pass"
}
"@
$headerA = @{
"Content-Type"="application/json"
}
[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
#Get token via post
$loginResponse = Invoke-RestMethod -Uri "https://www.demo.microfocusfileanalysis.com:9310/v1/auth/login" -Method 'Post' -Body $auth -Headers $headerA -UseBasicParsing
$token = $loginResponse.accessToken.ToString()
#Write-Output "Access token is $token"
#Find the custom workbook activities requiring processing in the "encrypt-in-file" type
$params0 = @{
Uri = 'https://www.demo.microfocusfileanalysis.com:9310/ca/v1/workspace/custom-activity-workbooks/encrypt-in-file?processingStatus=Pending'
Headers = @{ 'Authorization' = "Bearer $token" }
Method = 'GET'
ContentType = 'application/json'}
#Execute Rest call
$custActResponse = Invoke-RestMethod @params0 -UseBasicParsing
Write-Output "Found workbooks for external processing" $custActResponse.Count.ToString()
foreach ( $item in $custActResponse)
{
$workbookID = $item.id.ToString()
$workspaceID = $item.workspaceId.ToString()
Write-Output "Processing Workbook $workbookID"
$header = @{
"Accept"="application/json"
"Authorization"="Bearer $token"
"Content-Type"="application/json"
}
Write-Output "Call to set status to processing"
$status = @{
"status"="Processing"
"successCount"=0
"errorCount"=0
} | ConvertTo-Json
#Call to set status to processing
Invoke-RestMethod -Uri "https://www.demo.microfocusfileanalysis.com:9310/ca/v1/workspace/$workspaceID/workbook/$workbookID/custom-activity/encrypt-in-file" -Method 'Put' -body $status -Headers $header -UseBasicParsing
#Now get the list of files in the workbook
$searchResponse = Invoke-RestMethod -Uri "https://www.demo.microfocusfileanalysis.com:9310/ca/v1/workspace/$workspaceID/workbook/$workbookID/documents?queryFields=collection_status%2Cfile_ext&limit=1000" -Method 'Get' -Headers $header -UseBasicParsing
foreach ( $item in $searchResponse.documentMetadataList)
{
Write-Output "Processing file: " $item.metadata.title
Write-Output "Collection Status: " $item.metadata.collection_status
#Here is the file path
$item.metadata.filepath
$docID = $item.documentId.ToString()
$docID
#Instantiate View Content Output as HTML
$viewHeader = @{ #Header for document masked content
"Accept"="application/octet-stream"
"Authorization"="Bearer $token"
"Content-Type"="application/x-www-form-urlencoded"
}
[bool]$MetadataOnly = $false
#Check Collection Status and establish View URL as near native view or plain text
If ($item.metadata.collection_status -eq "COLLECTED")
{
$viewUrl = "https://www.demo.microfocusfileanalysis.com:9310/v1/view/preview?docId=$docID&maskData=false"
Write-Output "Document Collected - Retrieving Near Native HTML Rendition"
Write-Output "View URL: " $viewUrl
}
ElseIf ($item.metadata.collection_status -eq "CONTENT")
{
$viewUrl = "https://www.demo.microfocusfileanalysis.com:9310/v1/view/preview-text-content?docId=$docID&maskData=false"
Write-Output "Document Not Collected - Retrieving Text-Only HTML Rendition"
Write-Output "View URL: " $viewUrl
}
Else
{
$MetadataOnly = $true
}
If ($metadataOnly -eq $true) #With MetadataOnly Documents, we skip the document since no HTML rendition is available
{
Write-Output "Current file is metadata only and cannot generate content or collected view"
}
If ($MetadataOnly -eq $false)
{
$viewAnswer = Invoke-RestMethod -Uri $viewUrl -Method 'Post' -Headers $viewHeader
$viewTempFile = $tempDir + $docID + "-" + "tempview.html"
Write-Output "Temp View File: " $viewTempFile
Out-File -FilePath $viewTempFile -InputObject $viewAnswer -Encoding ASCII -Width 50
#Get source file path
#######$original_file = $item.metadata.filepath
#######[System.IO.FileInfo]$path = $original_file
[System.IO.FileInfo]$path = $item.metadata.filepath
$originalDestination_file= $path.DirectoryName + "\" + $path.BaseName + "-encrypted" + ".html" #Set destination for encrypted rendition when storing with original source file
$outputEncrypted_File = $outputDir + $docID + "-" + $path.BaseName + "-encrypted" + ".html" #Set destination for encrypted rendition when outputing to a temporary output path
#Get contents of HTML rendition for later use when performing search&replace with FPE responses
$fileText = Get-Content $viewTempFile -Raw
#Now get the list of privacy metadata for the current file
$metadataResponse = Invoke-RestMethod -Uri "https://www.demo.microfocusfileanalysis.com:9310/research/v1/document/$docID/privacy-metadata?maskData=false" -Method 'Post' -Headers $header -UseBasicParsing
#Loop to file all the entities to protect
foreach ($grammarMetadata in $metadataResponse.grammarMetadata)
{
$grammarMetadata.name
#Write-Output $entity.properties.MATCHES
foreach ($entities in $grammarMetadata.entities)
{
$entities.name
foreach ($entityValues in $entities.entityValues)
{
$entityValues.name
if( $entityValues.score -gt 0.5)
{
#Here are the entities in the file with a confidence of over 50% that we should protect
$entityValues.name
$SDAheaders = @{
"Content-Type"="application/json"
"accept"="application/json"
"Authorization"='VSAuth vsauth_method="sharedSecret",vsauth_data="ffej333jhf8*&2nf",vsauth_identity_ascii="secgov@theorypie.com",vsauth_version="200"'
}
$SDAbody = '{"format":"AUTO", "data":["' + $entityValues.name + '"]}'
$SDAbody
$maskedtext = Invoke-RestMethod -Method Post -Uri $voltageSDA -Headers $SDAheaders -Body $SDAbody
$maskedtext.data
$interrogativeMaskedtext = "¿¿" + $maskedtext.data + "??"
$fileText = $fileText -replace ([regex]::Escape($entityValues.name)), $interrogativeMaskedtext#$maskedtext.data
}
}
}
}
Write-Output "Masked up text"
Write-Output "=============="
Write-Output $fileText
if ($copyFileToSourcePath -eq $true) #Write marked up text to new file in same path
{
$fileText | Set-Content -Path $originalDestination_file
}
if ($copyFileToSourcePath -eq $false) #Write marked up text to new file in output path
{
$fileText | Set-Content -Path $outputEncrypted_File
}
}
#Cleanup temporary view file
#remove-item $viewTempFile
}
Write-Output "Call to set status to completed"
$status = @{
"status"="Completed"
"successCount"=$searchResponse.documentMetadataList.Count
"errorCount"=0
} | ConvertTo-Json
#Call to set status to completed
Invoke-RestMethod -Uri "https://www.demo.microfocusfileanalysis.com:9310/ca/v1/workspace/$workspaceID/workbook/$workbookID/custom-activity/encrypt-in-file" -Method 'Put' -body $status -Headers $header -UseBasicParsing
}