forked from JocaPC/synapse-sql-utilities
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathserverless-sql-utilities.sql
451 lines (417 loc) · 17.9 KB
/
serverless-sql-utilities.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
--------------------------------------------------------------------------------
-- Synapse serverless SQL pool - Query Performance Insights
-- Author: Jovan Popovic
--------------------------------------------------------------------------------
SET QUOTED_IDENTIFIER OFF; -- Because I use "" as a string literal
GO
IF SCHEMA_ID('qpi') IS NULL
EXEC ('CREATE SCHEMA qpi');
GO
CREATE OR ALTER VIEW qpi.queries
AS
SELECT
text = IIF(LEFT(text,1) = '(', TRIM(')' FROM SUBSTRING( text, (PATINDEX( '%)[^),]%', text))+1, LEN(text))), text) ,
params = IIF(LEFT(text,1) = '(', SUBSTRING( text, 2, (PATINDEX( '%)[^),]%', text+')'))-2), '') ,
execution_type_desc = status COLLATE Latin1_General_CS_AS,
first_execution_time = start_time, last_execution_time = NULL, count_executions = NULL,
elapsed_time_s = total_elapsed_time /1000.0,
cpu_time_s = cpu_time /1000.0,
logical_io_reads = logical_reads,
logical_io_writes = writes,
physical_io_reads = reads,
num_physical_io_reads = NULL,
clr_time = NULL,
dop,
row_count,
memory_mb = granted_query_memory *8 /1000,
log_bytes = NULL,
tempdb_space = NULL,
query_text_id = NULL, query_id = NULL, plan_id = NULL,
database_id, connection_id, session_id, request_id, command,
interval_mi = null,
start_time,
end_time = null,
sql_handle
FROM sys.dm_exec_requests
CROSS APPLY sys.dm_exec_sql_text(sql_handle)
WHERE session_id <> @@SPID
GO
CREATE OR ALTER VIEW qpi.query_history
AS
SELECT query_text_id = query_hash,
request_id = distributed_statement_id,
elapsed_time_s = total_elapsed_time_ms /1000.,
query_text = CASE query_text
WHEN '*** Internal delta query ***' THEN 'Scanning Delta transaction log...'
WHEN '*** Global stats query ***' THEN 'Collecting file statistics...'
WHEN '*** External table stats query ***' THEN 'Collecting file statistics...'
ELSE query_text END,
data_processed_mb = data_processed_mb,
start_time, end_time,
transaction_id,
status,
error, error_code
FROM sys.dm_exec_requests_history
GO
CREATE OR ALTER
FUNCTION qpi.cmp_queries (@request_id1 varchar(40), @request_id2 varchar(40))
returns table
return (
select property = a.[key], a.value query_1, b.value query_2
from
(select [key], value
from openjson(
(select *
from qpi.query_history
where request_id = @request_id1
for json path, without_array_wrapper)
)) as a ([key], value)
join
(select [key], value
from openjson(
(select *
from qpi.query_history
where request_id = @request_id2
for json path, without_array_wrapper)
)) as b ([key], value)
on a.[key] = b.[key]
where a.value <> b.value
);
go
CREATE OR ALTER VIEW qpi.recommendations
AS
with sql_definition as (
select
object_id,
format_type = CASE
WHEN UPPER(m.definition) LIKE '%''PARQUET''%' THEN 'PARQUET'
WHEN UPPER(m.definition) LIKE '%''DELTA''%' THEN 'DELTA'
WHEN UPPER(m.definition) LIKE '%''CSV''%' THEN 'CSV'
WHEN UPPER(m.definition) LIKE '%''COSMOSDB''%' THEN 'COSMOSDB'
WHEN UPPER(m.definition) NOT LIKE '%''PARQUET''%'
AND (UPPER(m.definition) NOT LIKE '%''CSV''%' )
AND (UPPER(m.definition) NOT LIKE '%''DELTA''%' )
AND (UPPER(m.definition) NOT LIKE '%''COSMOSDB''%' )
THEN 'COMPOSITE'
ELSE 'MIXED'
END
from sys.sql_modules m
),
bulkpath as (
select schema_name = schema_name(v.schema_id), v.name, val =TRIM(SUBSTRING( LOWER(m.definition) , PATINDEX('%bulk%', LOWER(m.definition)), 2048)), m.definition
from sys.views v
join sys.sql_modules m on v.object_id = m.object_id
where PATINDEX('%bulk%', LOWER(m.definition)) > 0
and schema_name(v.schema_id) <> 'qpi'
),
view_path as (
select name,
schema_name,
path = SUBSTRING(val,
CHARINDEX('''', val, 0)+1,
(CHARINDEX('''', val, CHARINDEX('''', val, 0)+1) - CHARINDEX('''', val, 0) - 1))
from bulkpath
where CHARINDEX('''', val, 0) > 0
and schema_name <> 'qpi'
),
recommendations as (
select name = 'USE VARCHAR UTF-8 TYPE',
score = 1.0,
schema_name = schema_name(v.schema_id),
object = v.name,
column_name = c.name,
reason = CONCAT('The view ', v.name, ' that is created on ', m.format_type,' dataset has ', count(c.column_id), ' columns with ') +
IIF( t.name = 'nchar', 'NVARCHAR/NCHAR type.', 'VARCHAR/CHAR type without UTF-8 collation.') +
' You might get conversion error.' +
' Change the column types to VARCHAR with some UTF8 collation.'
from sys.views as v join sys.columns as c on v.object_id = c.object_id
join sql_definition m on v.object_id = m.object_id
join sys.types t on c.user_type_id = t.user_type_id
where ( m.format_type IN ('PARQUET', 'DELTA', 'COSMOSDB', 'MIXED') )
AND ( (t.name iN ('nchar', 'nvarchar')) OR (t.name iN ('nchar', 'nvarchar') AND c.collation_name NOT LIKE '%UTF8') )
group by v.schema_id, v.name, t.name, m.format_type, c.name
union all
-- Tables on UTF-8 files with NVARCHAR/NCHAR columns or CHAR/VARCHAR without UTF8 collation:
select name = 'USE VARCHAR TYPE',
score = IIF( t.name LIKE 'n%', 0.3, 1.0),
schema_name = schema_name(e.schema_id),
object = e.name,
column_name = IIF(count(c.column_id)=1, max(c.name), CONCAT(count(c.column_id), ' columns')),
reason = CONCAT('The table "', schema_name(e.schema_id), '.', e.name, '" that is created on ', f.format_type, ' files ') +
CONCAT(IIF( f.encoding = 'UTF8', ' with UTF-8 encoding ', ''), ' has ',
IIF(count(c.column_id)=1, '"' + max(c.name) + '" column', CONCAT(count(c.column_id), ' columns') ), ' with ') +
IIF( t.name LIKE 'n%', 'NVARCHAR/NCHAR', 'VARCHAR/CHAR without UTF-8 collation.') +
' type. Change the column types to VARCHAR with some UTF8 collation.'
from sys.external_tables as e join sys.columns as c on e.object_id = c.object_id
join sys.external_file_formats f on e.file_format_id = f.file_format_id
join sys.types t on c.user_type_id = t.user_type_id
where ( (f.format_type IN ('PARQUET', 'DELTA')) OR f.encoding = 'UTF8' )
AND ( (t.name iN ('nchar', 'nvarchar')) OR (t.name iN ('nchar', 'nvarchar') AND c.collation_name NOT LIKE '%UTF8'))
group by e.schema_id, f.format_type, e.name, f.encoding , t.name, c.name
union all
-- Tables on UTF-16 files with VARCHAR/CHAR columns:
select name = 'USE NVARCHAR TYPE',
score = 1.0,
schema_name = schema_name(e.schema_id),
object = e.name,
column_name = IIF(count(c.column_id)=1, max(c.name), CONCAT(count(c.column_id), ' columns')),
reason = CONCAT('The table "', schema_name(e.schema_id), '.', e.name, '" created on CSV files with UTF16 encoding has ',
IIF(count(c.column_id)=1, '"' + max(c.name) + '" column', CONCAT(count(c.column_id), ' columns') ), ' with ') +
'VARCHAR/CHAR type. Change the column type to NVARCHAR.'
from sys.external_tables as e join sys.columns as c on e.object_id = c.object_id
join sys.external_file_formats f on e.file_format_id = f.file_format_id
join sys.types t on c.user_type_id = t.user_type_id
where (f.encoding = 'UTF16' )
AND (t.name iN ('nchar', 'nvarchar'))
group by e.schema_id, f.format_type, e.name, f.encoding , t.name
union all
select name = 'OPTIMIZE STRING FILTER',
score = case
when string_agg(c.name,',') like '%id%' then 0.9
when string_agg(c.name,',') like '%code%' then 0.9
when count(c.column_id) > 1 then 0.81
else 0.71
end,
schema_name = schema_name(v.schema_id),
object = v.name,
column_name = IIF(count(c.column_id)=1, max(c.name), CONCAT(count(c.column_id), ' columns')),
reason = CONCAT('The view "', schema_name(v.schema_id), '.', v.name, '" that is created on ', m.format_type, ' dataset has ',
IIF(count(c.column_id)=1, '"' + max(c.name) + '" column', CONCAT(count(c.column_id), ' columns') ), ' with ') +
IIF( t.name = 'nchar', 'NVARCHAR/NCHAR type.', 'VARCHAR/CHAR type without BIN2 UTF8 collation.') +
' Change the column types to VARCHAR with the Latin1_General_100_BIN2_UTF8 collation.'
from sys.views as v join sys.columns as c on v.object_id = c.object_id
join sql_definition m on v.object_id = m.object_id
join sys.types t on c.user_type_id = t.user_type_id
where ( m.format_type IN ('PARQUET', 'DELTA', 'COSMOSDB', 'MIXED') )
AND ( t.name IN ('char', 'varchar') AND c.collation_name <> 'Latin1_General_100_BIN2_UTF8' )
group by v.schema_id, v.name, t.name, m.format_type
union all
-- Tables on Parquet/Delta Lake files with the columns without BIN2 UTF-8 collation:
select name = 'OPTIMIZE STRING FILTER',
score = 0.6,
schema_name = schema_name(e.schema_id),
object = e.name,
column_name = c.name,
reason = CONCAT('The string column "', c.name, '" in table "', schema_name(t.schema_id), '.', t.name, '" doesn''t have "Latin1_General_100_BIN2_UTF8". String filter on this column are suboptimal')
from sys.external_tables as e join sys.columns as c on e.object_id = c.object_id
join sys.external_file_formats f on e.file_format_id = f.file_format_id
join sys.types t on c.user_type_id = t.user_type_id
where ( (f.format_type IN ('PARQUET', 'DELTA'))) AND t.name IN ('char', 'varchar') AND c.collation_name <> 'Latin1_General_100_BIN2_UTF8'
union all
-- Oversized string columns:
select name = 'OPTIMIZE COLUMN TYPE',
score = ROUND(0.3 + (IIF(c.max_length=-1, 0.7*12000., c.max_length)/12000.),1),
schema_name = schema_name(o.schema_id),
object = o.name,
column_name = c.name,
reason = CONCAT('The string column "', c.name, '" has a max size ',
IIF(c.max_length=-1, ' 2 GB', CAST( c.max_length AS VARCHAR(10)) + ' bytes'), '. Check could you use a column with a smaller size.',
IIF(o.type = 'U', ' Table ', ' View '), '"', schema_name(o.schema_id), '.', o.name, '"')
from sys.objects as o join sys.columns as c on o.object_id = c.object_id
join sys.types t on c.user_type_id = t.user_type_id
where t.name LIKE '%char' AND (c.max_length > 256 OR c.max_length = -1)
and o.type in ('U', 'V')
and lower(c.name) not like '%desc%'
and lower(c.name) not like '%comment%'
and lower(c.name) not like '%note%'
and lower(c.name) not like '%exception%'
and lower(c.name) not like '%reason%'
and lower(c.name) not like '%explanation%'
union all
-- Oversized key columns:
select name = 'OPTIMIZE KEY COLUMN TYPE',
score = 0.4 + ROUND((1-EXP(-IIF(c.max_length=-1, 8000., c.max_length)/8000.)),1),
schema_name = schema_name(o.schema_id),
object = o.name,
column_name = c.name,
reason = CONCAT('Are you using the column "', c.name, '" in join/filter predicates? ',
'The column type is ', t.name, '(size:',IIF(c.max_length=-1, ' 2 GB', CAST( c.max_length AS VARCHAR(10)) + ' bytes'),'). ',
'Try to use a column with a smaller type or size.')
from sys.objects as o join sys.columns as c on o.object_id = c.object_id
join sys.types t on c.user_type_id = t.user_type_id
where (c.name LIKE '%code' OR c.name LIKE '%id') AND (c.max_length > 8 OR c.max_length = -1)
and o.type in ('U', 'V')
union all
-- The tables that are referencing the same location:
select name = 'REMOVE DUPLICATE REFERENCES',
score = 0.9,
schema_name = NULL,
object = NULL,
column_name = NULL,
reason = CONCAT('The tables ', string_agg(concat('"',schema_name(e.schema_id),'.',e.name,'"'), ','), ' are referencing the same location')
from sys.external_tables e
group by data_source_id, location
having count(*) > 1
union all
-- Partitioned external table
select name = 'REPLACE TABLE WITH PARTITIONED VIEW',
score = 1.0,
schema_name = schema_name(e.schema_id),
object = e.name,
column_name = NULL,
reason = CONCAT('The table ', e.name, ' is created on a partitioned data set, but cannot leverage partition elimination. Replace it with a partitioned view.')
from sys.external_tables e
where REPLACE(location, '*.', '') like '%*%'
union all
select name = 'USE BETTER COLUMN TYPE',
score = IIF(c.max_length=-1, 1.0, 0.2 + ROUND((1-EXP(-c.max_length/50.))/2,1)),
schema_name = schema_name(o.schema_id),
object = o.name,
column_name = c.name,
reason = CONCAT('Do you need to use the type "', t.name, '(size:',IIF(c.max_length=-1, ' 2 GB', CAST( c.max_length AS VARCHAR(10)) + ' bytes'),') in column "', c.name, '" in view: "', schema_name(o.schema_id), '.', o.name, '"')
from sys.objects as o join sys.columns as c on o.object_id = c.object_id
join sys.types t on c.user_type_id = t.user_type_id
where
t.name IN ('nchar', 'nvarchar', 'char', 'varchar', 'binary', 'varbinary')
AND
( LOWER(c.name) like '%date%' OR LOWER(c.name) like '%time%'
OR LOWER(c.name) like '%guid%'
OR LOWER(c.name) like '%price%' OR LOWER(c.name) like '%amount%' )
AND
o.type in ('U', 'V')
and lower(c.name) not like '%desc%'
and lower(c.name) not like '%comment%'
and lower(c.name) not like '%note%'
and lower(c.name) not like '%exception%'
and lower(c.name) not like '%reason%'
and lower(c.name) not like '%explanation%'
union all
select name = 'REMOVE DUPLICATE REFERENCES',
score = 0.9,
schema_name = NULL,
object = NULL,
column_name = NULL,
reason = CONCAT('Views ', string_agg(concat(schema_name,'.',name), ','), ' are referencing the same path: ', path)
from view_path
group by path
having count(*) > 1
)
SELECT * FROM recommendations
WHERE schema_name <> 'qpi'
GO
CREATE OR ALTER PROCEDURE qpi.generate_cosmosdb_with_schema ( @connection nvarchar(max), @container nvarchar(1000))
AS BEGIN
DECLARE @tsql NVARCHAR(MAX)
SET @tsql = "SELECT TOP 10 *
FROM OPENROWSET(
'CosmosDB',
'"+@connection+"',
"+@container + ") as data"
create table #frs (
is_hidden bit not null,
column_ordinal int not null,
name sysname null,
is_nullable bit not null,
system_type_id int not null,
system_type_name nvarchar(256) null,
max_length smallint not null,
precision tinyint not null,
scale tinyint not null,
collation_name sysname null,
user_type_id int null,
user_type_database sysname null,
user_type_schema sysname null,
user_type_name sysname null,
assembly_qualified_type_name nvarchar(4000),
xml_collection_id int null,
xml_collection_database sysname null,
xml_collection_schema sysname null,
xml_collection_name sysname null,
is_xml_document bit not null,
is_case_sensitive bit not null,
is_fixed_length_clr_type bit not null,
source_server sysname null,
source_database sysname null,
source_schema sysname null,
source_table sysname null,
source_column sysname null,
is_identity_column bit null,
is_part_of_unique_key bit null,
is_updateable bit null,
is_computed_column bit null,
is_sparse_column_set bit null,
ordinal_in_order_by_list smallint null,
order_by_list_length smallint null,
order_by_is_descending smallint null,
tds_type_id int not null,
tds_length int not null,
tds_collation_id int null,
tds_collation_sort_id tinyint null
);
insert #frs
exec sys.sp_describe_first_result_set @tsql;
declare @with_clause nvarchar(max);
set @with_clause = (select 'WITH (' + string_agg(QUOTENAME(name) + ' ' + system_type_name, ', ') + ')' from #frs);
select
'Note:', 'This is an autogenerated schema for cosmosDB contianer. Try to optimize it and minimize the types like VARCHAR(8000)!'
union all
select 'Query:', "SELECT * FROM OPENROWSET( 'CosmosDB',
'"+@connection+"',
"+@container + ') ' + @with_clause + ' as data'
union ALL
select 'WITH clause:', @with_clause;
END
GO
SET QUOTED_IDENTIFIER OFF; -- Because I use "" as a string literal
GO
-- Creates a disgnostic view on a folder where diagnostic settings are created.
-- Example usage: exec qpi.create_diagnostics 'https://jovanpoptest.dfs.core.windows.net/insights-logs-builtinsqlreqsended/'
CREATE OR ALTER PROCEDURE qpi.create_diagnostics @path varchar(1024)
AS BEGIN
DECLARE @tsql VARCHAR(MAX);
SET @tsql = CONCAT("DROP EXTERNAL DATA SOURCE [Diagnostics];
CREATE EXTERNAL DATA SOURCE [Diagnostics] WITH ( LOCATION = '", @path, "' );");
EXEC(@tsql);
SET @tsql = "CREATE OR ALTER VIEW qpi.diagnostics
AS SELECT
subscriptionId = r.filepath(1),
resourceGroup = r.filepath(2),
workspace = r.filepath(3),
year = CAST(r.filepath(4) AS SMALLINT),
month = CAST(r.filepath(5) AS TINYINT),
day = CAST(r.filepath(6) AS TINYINT),
hour = CAST(r.filepath(7) AS TINYINT),
minute = CAST(r.filepath(8) AS TINYINT),
details.queryType,
durationS = CAST(details.durationMs / 1000. AS NUMERIC(8,1)),
dataProcessedMB = CAST(details.dataProcessedBytes /1024./1024 AS NUMERIC(16,1)),
details.distributedStatementId,
details.queryText,
details.startTime,
details.endTime,
details.resultType,
--details.queryHash,
details.operationName,
details.endpoint,
details.resourceId,
details.error
FROM
OPENROWSET(
BULK 'resourceId=/SUBSCRIPTIONS/*/RESOURCEGROUPS/*/PROVIDERS/MICROSOFT.SYNAPSE/WORKSPACES/*/y=*/m=*/d=*/h=*/m=*/*.json',
DATA_SOURCE = 'Diagnostics',
FORMAT = 'CSV',
FIELDQUOTE = '0x0b',
FIELDTERMINATOR ='0x0b'
)
WITH (
jsonContent varchar(MAX)
) AS r CROSS APPLY OPENJSON(jsonContent)
WITH ( endpoint varchar(128) '$.LogicalServerName',
resourceGroup varchar(128) '$.ResourceGroup',
startTime datetime2 '$.properties.startTime',
endTime datetime2 '$.properties.endTime',
dataProcessedBytes bigint '$.properties.dataProcessedBytes',
durationMs bigint,
loginName varchar(128) '$.identity.loginName',
distributedStatementId varchar(128) '$.properties.distributedStatementId',
resultType varchar(128) ,
queryText varchar(max) '$.properties.queryText',
queryHash varchar(128) '$.properties.queryHash',
operationName varchar(128),
error varchar(128) '$.properties.error',
queryType varchar(128) '$.properties.command',
resourceId varchar(1024) '$.resourceId'
) as details";
EXEC(@tsql);
END
GO