Open file C:\Program Files\Microsoft Integration Runtime\5.0\PowerShellScript\RegisterIntegrationRuntime.ps1 Modify the parameters:
And execute (with the selfhostes integration runtime Agent Enabled!)
| CREATE MASTER KEY ENCRYPTION BY PASSWORD='dwdweoifjidw$_fwefjwoifj%%123'; | |
| CREATE DATABASE SCOPED CREDENTIAL ServiceIdentity WITH IDENTITY = 'Managed Identity'; | |
| CREATE schema stg | |
| GO | |
| CREATE SCHEMA silver | |
| GO |
| ADFActivityRun | |
| | where ActivityType == 'Copy' | |
| | where Status == 'Succeeded' | |
| | where EffectiveIntegrationRuntime == 'IRAzureVM' | |
| | extend d = parse_json(Output) | |
| | project TimeGenerated, CorrelationId, d.rowsCopied, ActivityRunId, PipelineRunId, PipelineName |
| DECLARE @TableName NVARCHAR(128) | |
| DECLARE @SQL NVARCHAR(500) | |
| DECLARE TableCursor CURSOR FOR | |
| SELECT TABLE_NAME | |
| FROM INFORMATION_SCHEMA.TABLES | |
| WHERE TABLE_TYPE = 'BASE TABLE' | |
| OPEN TableCursor | |
| FETCH NEXT FROM TableCursor INTO @TableName |
| def addStructure(df, field, structFields): | |
| ''' | |
| Receive a df, a field and structure in a dictionary. | |
| ''' | |
| # compruebas si todos los valores son nulos, pq cuando lo son no funciona el replace de null por una estructura. | |
| validator = df.select(df[field]).distinct() | |
| if validator.schema[field].dataType.typeName() == 'string': | |
| otherwiseField = None | |
| else: | |
| otherwiseField = df[field] |
| # reads a csv | |
| def read_csv_from_rown_number(path, row_number = 1, sep=';', schema = None, ): | |
| # load csv as raw text abd add a Id clumns | |
| df = spark.read.text(path) | |
| df = df.withColumn('idRowNbField', f.monotonically_increasing_id()) | |
| df = df.where('idRowNbField >= %s' % row_number) | |
| #extract header and clean header | |
| header = df.limit(1).select('value').collect()[0] | |
| header_list= list(header.asDict()['value'].split(sep)) |
| # Run first "connect-azAccount" to set AAD user authentication | |
| $filesystemName = 'storageFilesystem' | |
| $path = 'folder' | |
| $storageName = 'storagName' | |
| $ctx = New-AzStorageContext -StorageAccountName $storageName -UseConnectedAccount | |
| $Files = Get-AzDataLakeGen2ChildItem -Context $ctx -FileSystem $filesystemName -Path $path -Recurse | Where-Object IsDirectory -eq $false |
| def mountContainer(appId, secret, tenantId, storageName, containerName, mountPoint): | |
| configs = {"fs.azure.account.auth.type": "OAuth", | |
| "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider", | |
| "fs.azure.account.oauth2.client.id": appId, | |
| "fs.azure.account.oauth2.client.secret": secret, | |
| "fs.azure.account.oauth2.client.endpoint": f"https://login.microsoftonline.com/{tenantId}/oauth2/token"} | |
| # Optionally, you can add <directory-name> to the source URI of your mount point. | |
| dbutils.fs.mount( | |
| source = f"abfss://{containerName}@{storageName}.dfs.core.windows.net/", |
| # Task to list files in dir... | |
| - script: | | |
| tree | |
| workingDirectory: $(Agent.BuildDirectory)\$(artifactPipe) |
| cd C:\Users\joan.teixido\AppData\Local\Packages\CanonicalGroupLimited.Ubuntu20.04onWindows_79rhkp1fndgsc\LocalState | |
| wsl --shutdown | |
| optimize-vhd -Path .\ext4.vhdx -Mode full | |
| cd C:\Users\joan.teixido\AppData\Local\Docker\wsl\data | |
| optimize-vhd -Path .\ext4.vhdx -Mode full |