-
Notifications
You must be signed in to change notification settings - Fork 0
/
qdsm.py
464 lines (387 loc) · 14 KB
/
qdsm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
# -*- coding:utf-8 -*-
INFOS = {
'author':'domlysz',
'author_contact':'[email protected]',
'organization':'Entente Causses Cévennes',
'org_contact':'[email protected]',
'version':'0.1',
'date':'april 2017',
'python':'3.5',
'ext_deps':[],
'license':'GPL3'
}
import os, sys
from xml.etree import ElementTree as etree
#provide a list of extension to manually define what kind of path can be edited
#its a security to avoid breaking non-file paths like w*s service or remote database urls
VALID_EXT = [
'.shp', '.tab', '.dxf',
'.osm', '.kml', '.gml', '.gpx', '.geojson',
'.tif', '.tiff', '.jpg', '.jpeg', '.png', '.jp2', '.ecw', '.vrt',
'.gpkg', '.sqlite', '.db', '.mbtiles',
'.csv', '.txt', '.xls', '.xlsx', '.ods',
'.pdf', '.svg'
]
VALID_EXT = VALID_EXT + [ext.upper() for ext in VALID_EXT]
class QgsProjects():
'''Represent a container of qgis projects'''
def __init__(self, folders):
self.folders = folders
self.projects = []
#list all qgs files contained in the submited folders list
for rootdir in self.folders:
for root, subFolders, files in os.walk(rootdir):
projects = [f for f in files if f[-4:] == '.qgs']
for name in projects:
projectPath = root + os.sep + name
projectPath = projectPath.replace('\\', '/')#backslash to slash
self.projects.append(QgsProject(projectPath))
def __iter__(self):
return iter(self.projects)
def __repr__(self):
return '\n'.join([prj.path for prj in self.projects])
def __len__(self):
return len(self.projects)
def pProgress(self, i, n, percent=False):
'''progress bar'''
#Writing '\r' will move the cursor back to the beginning of the line
#specify empty end character to avoid default newline of print function
if percent:
print('\rProgress : ' + str(((i+1)*100)/n), end='%')
else:
print('\rProgress : ' + str(i+1)+'/'+str(n), end='')
if i+1 == n: print('')#will print a newline
def read(self, verbose=False):
'''parse all qgs projects'''
for i, qgs in enumerate(self.projects):
if not verbose:
self.pProgress(i, len(self.projects))
qgs.read(verbose=verbose)
def toAbs(self, verbose=False):
'''convert all project's datasource paths to absolute'''
for qgs in self.projects:
if not verbose:
self.pProgress(i, len(self.projects))
qgs.toAbs(verbose=verbose)
def toRel(self, verbose=False):
'''convert all project's datasource paths to relative'''
for qgs in self.projects:
if not verbose:
self.pProgress(i, len(self.projects))
qgs.toRel(verbose=verbose)
def swap(self, swapFile, sep=';', writeRelPath=False, verbose=False):
'''process all project's datasource paths and update location if needed'''
for qgs in self.projects:
if not verbose:
self.pProgress(i, len(self.projects))
qgs.swap(swapFile, sep, writeRelPath, verbose=verbose)
@property
def parsed(self):
return all([prj.parsed for prj in self.projects])
def dump(self, output, sep='\t'):
with open(output, 'w', encoding='utf-8') as f:
#Select attributes of the datasource that will be dumped
prjAttr = ['path', 'absolutePath']
srcAttr = ['composer', 'path', 'subset', 'exists', 'dtype', 'provider', 'layer']
#write columns header
f.write(sep.join(prjAttr+srcAttr) + '\n')
#iterate and write
for prj in self.projects:
for src in prj:
prjProps = [str(getattr(prj, attr, '')) for attr in prjAttr]
srcProps = [str(getattr(src, attr, '')) for attr in srcAttr]
f.write(sep.join(prjProps+srcProps) + '\n')
@property
def nSource(self):
return len([src for prj in self.projects for src in prj])
@property
def nLayerSource(self):
return len([src for prj in self.projects for src in prj if src.composer==False])
@property
def nComposerSource(self):
return len([src for prj in self.projects for src in prj if src.composer==True])
@property
def nBrokenSource(self):
return len([src for prj in self.projects for src in prj \
if src.exists==False \
and getattr(src, 'provider', None) in ['gdal', 'ogr'] \
and os.path.splitext(src.path)[1] in VALID_EXT])
def getUniqueSources(self):
'''return unique sources'''
return QgsSources(self)
def findSource(self, path):
'''return paths of qgs projects that contains the input source path'''
path = path.replace('\\', '/')
return [prj.path for prj in self.projects if path in [src.path for src in prj.sources]]
class QgsSources():
'''A container of unique source list'''
def __init__(self, projects):
'''extract unique source list from a QgsProjects object'''
self.sources = []
for prj in projects:
for src in prj.sources:
#not well optimized but we don't expect billions of sources here...
if src.path not in [src.path for src in self.sources]:
self.sources.append(src)
def __iter__(self):
return iter(self.sources)
def __repr__(self):
return '\n'.join([src.path for src in self.sources])
def __len__(self):
return len(self.sources)
def sort(self):
self.sources.sort()
def dump(self, output, sep='\t', filtr={}):
with open(output, 'w', encoding='utf-8') as f:
srcAttr = ['composer', 'path', 'exists', 'dtype', 'provider']
f.write(sep.join(srcAttr) + '\n')
for src in self.sources:
if not filtr or any([getattr(src, k, None) == v for k, v in filtr.items()]):
srcProps = [str(getattr(src, attr, '')) for attr in srcAttr]
f.write(sep.join(srcProps) + '\n')
def getExtList(self):
#return set([os.path.splitext(src.path)[1] for src in self.sources])
'''return a dict of founded extension as keys and the number of occurence as values'''
d = {}
for src in self.sources:
ext = os.path.splitext(src.path)[1]
if ext in d:
d[ext] += 1
elif ext in VALID_EXT:
d[ext] = 1
return d
def getProvList(self):
'''return a dict of founded provider as keys and the number of occurence as values'''
d = {}
for src in self.sources:
if src.composer == False:
prov = src.provider
if prov in d:
d[prov] += 1
else:
d[prov] = 1
return d
@property
def nLayerSource(self):
return len([src for src in self.sources if src.composer==False])
@property
def nComposerSource(self):
return len([src for src in self.sources if src.composer==True])
@property
def nBrokenSource(self):
return len([src for src in self.sources \
if src.exists==False \
and getattr(src, 'provider', None) in ['gdal', 'ogr'] \
and os.path.splitext(src.path)[1] in VALID_EXT])
class QgsProject():
'''Represent a qgis project as datasouces container'''
def __init__(self, path):
self.path = path
self.sources = []
self.absolutePath = None
self.parsed = False
def __iter__(self):
return iter(self.sources)
def __repr__(self):
return self.path + '\n\t' + '\n\t'.join([src.path for src in self.sources])
def __len__(self):
return len(self.sources)
#####################
def _backslash2slash(self, path):
'''convert backslash to slash to make the path usuable both on dos and unix'''
return path.replace('\\', '/')
def _isAbsPath(self, path):
if path.startswith('.'):
return False
if path.startswith('/') or path[1] == ':':
return True
else:
return False
def _pathToAbs(self, path):
'''Normalize a source path as absolute link'''
#relative path to absolute
if not self._isAbsPath(path):
path = os.path.normpath(os.path.dirname(self.path) + os.sep + path)
return self._backslash2slash(path)
def _pathToRel(self, path):
'''absolute path to relative path'''
if self._isAbsPath(path):
try:
path = os.path.relpath(path, os.path.dirname(self.path))
#Force explicit ref to current folder if needed
if not path.startswith('.'):
path = './' + path
except ValueError:
#happen on dos when the 2 submited path do not share the same drive letter
pass
return self._backslash2slash(path)
def _pathSwap(self, path, swapDict):
'''swap a path following a reference dictionnary
input path must be an absolute path
output path will be an absolute path'''
path = swapDict.get(self._backslash2slash(path), path)
return self._backslash2slash(path)
#####################
def read(self, verbose=False):
'''parse the xml file'''
self._parse(verbose=verbose)
def toAbs(self, verbose=False):
'''convert all source paths to absolute'''
self._parse(write=True, verbose=verbose)
def toRel(self, verbose=False):
'''convert all source paths to relative'''
self._parse(write=True, writeRelPath=True, verbose=verbose)
def swap(self, swapFile, sep=';', writeRelPath=False, verbose=False):
'''process all source paths and update location if needed'''
#Extract swap file to dictionnary
swapDict = {}
with open(swapFile, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if line == '':
continue
src, dst = line.split(sep)
swapDict[src] = dst
self._parse(write=True, writeRelPath=writeRelPath, swapDict=swapDict, verbose=verbose)
#####################
def _parse(self, write=False, writeRelPath=False, swapDict={}, verbose=False):
'''
Parse a qgs xml file to build a list of datasource (QgsSource class)
extracted paths are always converted to absolute and slashed paths
if 'write' is True then the xml tree will be saved with these absolute paths
unless 'writeRelPath' is True, in this case paths will be converted to relative beforehand
If a swap dictionnary is passed then it will be used to update paths location
'''
if verbose:
print('Parsing ' + self.path)
with open(self.path, 'r', encoding='utf-8') as f:
try:
tree = etree.parse(f)
except:
if verbose:
print(' * File cannot be parsed')
return
rootTree = tree.getroot()
#Get project path mode property
if rootTree.find('.//properties/Paths/Absolute').text == 'true':
self.absolutePath = True
else:
self.absolutePath = False
##################
#Get composer ressources (image, svg...)
if verbose: print(' > Composer ressources')
for elem in rootTree.findall('.//Composer/Composition/ComposerPicture'):
#get normalized path
srcPath = self._pathToAbs(elem.attrib['file'])
srcExt = os.path.splitext(srcPath)[1]
if verbose: print(' - ' + srcPath)
#build source properties dictionnary
srcProps = {}
srcProps['composer'] = True
#Append in sources list as QgsSource object
src = QgsSource(srcPath, **srcProps)
self.sources.append(src)
#xml edits
if write:
if swapDict:
srcPath = self._pathSwap(srcPath, swapDict)
if srcPath != src.path:
src.path = srcPath #update QgsSource
if verbose: print(' <-> Swap to ' + srcPath)
if writeRelPath:
srcPath = self._pathToRel(srcPath)
#edit tree
elem.attrib['file'] = srcPath
##################
#Get layers datasources
if verbose: print(' > Map layer datasource')
for elem in rootTree.findall('.//projectlayers/maplayer'):
srcProps = {}
srcProps['composer'] = False
#Get provider
try:
srcProps['provider'] = elem.find('provider').text
except:
if verbose: print('pass layer node with no provider')
continue
#Get datasource url
try:
srcPath = elem.find('datasource').text
srcExt = None #do not except it easily
except:
if verbose: print('pass layer node with no datasource')
return
#Get some others properties
srcProps['dtype'] = elem.attrib["type"]
srcProps['layer'] = elem.find('layername').text
srcProps['subset'] = '' #org filter
#for now only edit datasource using gdal/org provider
#TODO handle other file based provider like
# >delimitedtext: file:\Z:\data\file.csv?type=csv&geomType=none
# >spatialite : dbname='/home/data/file.sqlite' table="tst" (geometry) sql=
# >gpx, virtual...
# for each provider we must be able to parse the datasource path and then reconstruct it if needed
if srcProps['provider'] in ['ogr', 'gdal']:
#we want to process only files based path not db or wms url
#but we can't just test if file exists because the path link can be outdated
#so just check if its a valid extension
if '|' in srcPath: #org filter in path
srcPath, srcProps['subset'] = srcPath.split('|', 1)
srcExt = os.path.splitext(srcPath)[1]
if srcExt in VALID_EXT:
#get normalized path
srcPath = self._pathToAbs(srcPath)
#Append in sources list as QgsSource object
if verbose: print(' - ' + srcPath)
src = QgsSource(srcPath, **srcProps)
self.sources.append(src)
#xml edits
if write and srcProps['provider'] in ['ogr', 'gdal'] and srcExt in VALID_EXT:
if swapDict:
srcPath = self._pathSwap(srcPath, swapDict)
if srcPath != src.path:
src.path = srcPath #update QgsSource
if verbose: print(' <-> Swap to ' + srcPath)
if writeRelPath:
srcPath = self._pathToRel(srcPath)
#edit tree
if srcProps['subset'] == '':
elem.find('datasource').text = srcPath
else:
elem.find('datasource').text = srcPath + '|' + srcProps['subset']
self.parsed = True
##################
#Write xml file
if write:
#update project path mode setting and QgsProject attribute
if writeRelPath:
if verbose: print('>> Saving with relative paths')
rootTree.find('.//properties/Paths/Absolute').text = 'false'
else:
if verbose: print('>> Saving with absolute paths')
rootTree.find('.//properties/Paths/Absolute').text = 'true'
self.absolutePath = not(writeRelPath)
#save xml
tree.write(self.path)
if verbose: print('\n')
class QgsSource():
'''
Attributes model:
path : absolute path of the datasource
composer : bool, flag if the source is a composer ressource or not
subset : the filter apply on the datasource at qgis ogr provider level
dtype : vector, raster, database, w*s ...
provider : qgis provider name [gdal, ogr, delimitedtext, postgres, spatialite, virtual, memoy, grass, gpx, wfs, wms...]
layer : the layer name in qgis toc
'''
def __init__(self, path, **kwargs):
self.path = path
for k, v in kwargs.items():
setattr(self, k, v)
def __repr__(self):
return self.path
def __lt__(self, other):
return self.path.lower() < other.path.lower()
@property
def exists(self):
return os.path.exists(self.path)