forked from include-what-you-use/include-what-you-use
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fix_includes.py
executable file
·2467 lines (2085 loc) · 102 KB
/
fix_includes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
##===--- fix_includes.py - rewrite source files based on iwyu output ------===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
from __future__ import print_function
"""Update files with the 'correct' #include and forward-declare lines.
Given the output of include_what_you_use on stdin -- when run at the
(default) --v=1 verbosity level or higher -- modify the files
mentioned in the output, removing their old #include lines and
replacing them with the lines given by the include_what_you_use
script.
This script runs in four stages. In the first, it groups physical
lines together to form 'move spans'. A 'move span' is the atomic unit
for moving or deleting code. A move span is either a) an #include
line, along with any comment lines immediately preceding it; b) a
forward-declare line -- or more if it's a multi-line forward declare
-- along with preceding comments; c) any other single line. Example:
// I really am glad I'm forward-declaring this class!
// If I didn't, I'd have to #include the entire world.
template<typename A, typename B, typename C, typename D>
class MyClass;
Then, it groups move spans together into 'reorder spans'. These are
spans of code that consist entirely of #includes and forward-declares,
maybe separated by blank lines and comments. We assume that we can
arbitrarily reorder #includes and forward-declares within a reorder
span, without affecting correctness. Things like #ifdefs, #defines,
namespace declarations, static variable declarations, class
definitions, etc -- just about anything -- break up reorder spans.
In stage 3 it deletes all #include and forward-declare lines that iwyu
says to delete. iwyu includes line numbers for deletion, making this
part easy. If this step results in "empty" #ifdefs or namespaces
(#ifdefs or namespaces with no code inside them), we delete those as
well. We recalculate the reorder spans, which may have gotten bigger
due to the deleted code.
In stage 4 it adds new iwyu-dictated #includes and forward-declares
after the last existing #includes and forward-declares. Then it
reorders the #includes and forward-declares to match the order
specified by iwyu. It follows iwyu's instructions as much as
possible, modulo the constraint that an #include or forward-declare
cannot leave its current reorder span.
All this moving messes up the blank lines, which we then need to fix
up. Then we're done!
"""
__author__ = '[email protected] (Craig Silverstein)'
import difflib
import argparse
import os
import re
import sys
from collections import OrderedDict
_EPILOG = """\
Reads the output from include-what-you-use on stdin -- run with --v=1 (default)
verbosity level or above -- and, unless --sort_only or --dry_run is specified,
modifies the files mentioned in the output, removing their old #include lines
and replacing them with the lines given by include-what-you-use. It also sorts
the #include and forward-declare lines.
All files mentioned in include-what-you-use output are modified, unless
filenames are specified on the commandline, in which case only those files are
modified.
The exit code is non-zero if a critical error occurs, otherwise zero.
"""
_COMMENT_RE = re.compile(r'\s*//.*')
# These are the types of lines a file can have. These are matched
# using re.match(), so don't need a leading ^.
_C_COMMENT_START_RE = re.compile(r'\s*/\*')
_C_COMMENT_END_RE = re.compile(r'.*\*/\s*(.*)$')
_COMMENT_LINE_RE = re.compile(r'\s*//')
_PRAGMA_ONCE_LINE_RE = re.compile(r'\s*#\s*pragma\s+once')
_PRAGMA_PUSH_LINE_RE = re.compile(r'\s*#\s*pragma.*push.*')
_PRAGMA_POP_LINE_RE = re.compile(r'\s*#\s*pragma.*pop.*')
_BLANK_LINE_RE = re.compile(r'\s*$')
_IF_RE = re.compile(r'\s*#\s*if') # compiles #if/ifdef/ifndef
_ELSE_RE = re.compile(r'\s*#\s*(else|elif)\b') # compiles #else/elif
_ENDIF_RE = re.compile(r'\s*#\s*endif\b')
# This is used to delete 'empty' namespaces after fwd-decls are removed.
# Some third-party libraries use macros to start/end namespaces.
_NAMESPACE_START_RE = re.compile(r'\s*(namespace\b[^{]*{\s*)+(//.*)?$|'
r'\s*(U_NAMESPACE_BEGIN)|'
r'\s*(HASH_NAMESPACE_DECLARATION_START)')
# Also detect Allman and mixed style namespaces. Use a continue regex for
# validation and to correctly set the line info.
_NAMESPACE_START_ALLMAN_RE = re.compile(r'\s*(namespace\b[^{=]*)+(//.*)?$')
_NAMESPACE_START_MIXED_RE = re.compile(
r'\s*(namespace\b[^{]*{\s*)+(namespace\b[^{]*)+(//.*)?$')
_NAMESPACE_CONTINUE_ALLMAN_MIXED_RE = re.compile(r'\s*{\s*(//.*)?$')
_NAMESPACE_END_RE = re.compile(r'\s*(})|'
r'\s*(U_NAMESPACE_END)|'
r'\s*(HASH_NAMESPACE_DECLARATION_END)')
# The group (in parens) holds the unique 'key' identifying this #include.
_INCLUDE_RE = re.compile(r'\s*#\s*include\s+([<"][^">]+[>"])')
# We don't need this to actually match forward-declare lines (we get
# that information from the iwyu input), but we do need an RE here to
# serve as an index to _LINE_TYPES. So we use an RE that never matches.
_FORWARD_DECLARE_RE = re.compile(r'$.FORWARD_DECLARE_RE')
# Likewise, used to mark an '#ifdef' line of a header guard, or other
# #ifdef that covers an entire file.
_HEADER_GUARD_RE = re.compile(r'$.HEADER_GUARD_RE')
# Marks the '#define' line that comes after a header guard. Since we
# know the previous line was a header-guard line, we're not that picky
# about this one.
_HEADER_GUARD_DEFINE_RE = re.compile(r'\s*#\s*define\s+')
# Pragma to mark the associated header (for use when it cannot be deduced from
# the filename)
_IWYU_PRAGMA_ASSOCIATED_RE = re.compile(r'IWYU\s*pragma:\s*associated')
# We annotate every line in the source file by the re it matches, or None.
# Note that not all of the above RE's are represented here; for instance,
# we fold _C_COMMENT_START_RE and _C_COMMENT_END_RE into _COMMENT_LINE_RE.
# The _NAMESPACE_CONTINUE_ALLMAN_MIXED_RE is also set on lines when Allman
# and mixed namespaces are detected but the RE is too easy to match to add
# under normal circumstances (must always be preceded by Allman/mixed).
_LINE_TYPES = [_COMMENT_LINE_RE, _BLANK_LINE_RE,
_NAMESPACE_START_RE, _NAMESPACE_START_ALLMAN_RE,
_NAMESPACE_START_MIXED_RE, _NAMESPACE_END_RE,
_IF_RE, _ELSE_RE, _ENDIF_RE,
_INCLUDE_RE, _FORWARD_DECLARE_RE,
_HEADER_GUARD_RE, _HEADER_GUARD_DEFINE_RE,
_PRAGMA_ONCE_LINE_RE,
_PRAGMA_PUSH_LINE_RE, _PRAGMA_POP_LINE_RE,
]
# A regexp matching #include lines that should be a barrier for
# sorting -- that is, we should never reorganize the code so an
# #include that used to come before this line now comes after, or vice
# versa. This can be used for 'fragile' #includes that require other
# #includes to happen before them to function properly.
# (Note that the barrier has no effect on where new #includes are
# added; it just affects the reordering of existing #includes.)
_BARRIER_INCLUDES = re.compile(r'^\s*#\s*include\s+(<linux/)')
# A list of all known extensions for C++ source files, used to
# guess if a filename is a source file or a header.
# Please keep this in sync with source_extensions in iwyu_path_util.cc.
_SOURCE_EXTENSIONS = [".c", ".C", ".cc", ".CC", ".cxx", ".CXX",
".cpp", ".CPP", ".c++", ".C++", ".cp"]
# Adapt Python 2 iterators to Python 3 syntax
if sys.version_info[0] < 3:
def next(i):
return i.next()
class OrderedSet(object):
""" Sometimes sets affect order of outputs, which hinders testing. This
(naive) set implementation preserves order to avoid that problem. """
def __init__(self, iterable=None):
iterable = iterable or []
self.storage = OrderedDict((a, None) for a in iterable)
def add(self, value):
self.storage[value] = None
def intersection_update(self, other):
self.storage = OrderedDict(
(k, None) for k in self.storage if k in other.storage)
def update(self, other):
self.storage.update(other.storage)
def difference(self, other):
diff_values = (v for v in self if v not in other)
return OrderedSet(diff_values)
def __iter__(self):
return self.storage.keys().__iter__()
def __contains__(self, value):
return value in self.storage
def __len__(self):
return len(self.storage)
def _MayBeHeaderFile(filename):
"""Tries to figure out if filename is a C++ header file. Defaults to yes."""
# Header files have all sorts of extensions: .h, .hpp, .hxx, or no
# extension at all. So we say everything is a header file unless it
# has a known extension that's not.
extension = os.path.splitext(filename)[1]
return extension not in _SOURCE_EXTENSIONS
class FixIncludesError(Exception):
pass
class IWYUOutputRecord(object):
"""Information that the iwyu output file has about one source file."""
def __init__(self, filename):
self.filename = filename
# A set of integers.
self.lines_to_delete = set()
# A set of integer line-numbers, for each #include iwyu saw that
# is marked with a line number. This is usually not an exhaustive
# list of include-lines, but that's ok because we only use this
# data structure for sanity checking: we double-check with our own
# analysis that these lines are all # #include lines. If not, we
# know the iwyu data is likely out of date, and we complain. So
# more data here is always welcome, but not essential.
self.some_include_lines = set()
# A set of integer line-number spans [start_line, end_line), for
# each forward-declare iwyu saw. iwyu reports line numbers for
# every forward-declare it sees in the source code. (It won't
# report, though, forward-declares inside '#if 0' or similar.)
self.seen_forward_declare_lines = set()
# Those spans which pertain to nested forward declarations (i.e. of nested
# classes). This set should be a subset of self.seen_forward_declare_lines.
self.nested_forward_declare_lines = set()
# A set of each line in the iwyu 'add' section.
self.includes_and_forward_declares_to_add = OrderedSet()
# A map from the include filename (including ""s or <>s) to the
# full line as given by iwyu, which includes comments that iwyu
# has put next to the #include. This holds both 'to-add' and
# 'to-keep' #includes. If flags.comments is False, the comments
# are removed before adding to this list.
self.full_include_lines = OrderedDict()
def Merge(self, other):
"""Merges other with this one. They must share a filename.
This function is intended to be used when we see two iwyu records
in the input, both for the same file. We can merge the two together.
We are conservative: we union the lines to add, and intersect the
lines to delete.
Arguments:
other: an IWYUOutputRecord to merge into this one.
It must have the same value for filename that self does.
"""
assert self.filename == other.filename, "Can't merge distinct files"
self.lines_to_delete.intersection_update(other.lines_to_delete)
self.some_include_lines.update(other.some_include_lines)
self.seen_forward_declare_lines.update(other.seen_forward_declare_lines)
self.nested_forward_declare_lines.update(other.nested_forward_declare_lines)
self.includes_and_forward_declares_to_add.update(
other.includes_and_forward_declares_to_add)
self.full_include_lines.update(other.full_include_lines)
def HasContentfulChanges(self):
"""Returns true iff this record has at least one add or delete."""
return (self.includes_and_forward_declares_to_add or
self.lines_to_delete)
def __str__(self):
return ('--- iwyu record ---\n FILENAME: %s\n LINES TO DELETE: %s\n'
' (SOME) INCLUDE LINES: %s\n (SOME) FWD-DECL LINES: %s\n'
' TO ADD: %s\n ALL INCLUDES: %s\n---\n'
% (self.filename, self.lines_to_delete,
self.some_include_lines, self.seen_forward_declare_lines,
self.includes_and_forward_declares_to_add,
self.full_include_lines))
class IWYUOutputParser(object):
"""Parses the lines in iwyu output corresponding to one source file."""
# iwyu adds this comment to some lines to map them to the source file.
_LINE_NUMBERS_COMMENT_RE = re.compile(r'\s*// lines ([0-9]+)-([0-9]+)')
# The output of include-what-you-use has sections that indicate what
# #includes and forward-declares should be added to the output file,
# what should be removed, and what the end result is. The first line
# of each section also has the filename.
_ADD_SECTION_RE = re.compile(r'^(.*) should add these lines:$')
_REMOVE_SECTION_RE = re.compile(r'^(.*) should remove these lines:$')
_TOTAL_SECTION_RE = re.compile(r'^The full include-list for (.*):$')
_SECTION_END_RE = re.compile(r'^---$')
# Alternately, if a file does not need any iwyu modifications (though
# it still may need its #includes sorted), iwyu will emit this:
_NO_EDITS_RE = re.compile(r'^\((.*) has correct #includes/fwd-decls\)$')
_RE_TO_NAME = {_ADD_SECTION_RE: 'add',
_REMOVE_SECTION_RE: 'remove',
_TOTAL_SECTION_RE: 'total',
_SECTION_END_RE: 'end',
_NO_EDITS_RE: 'no_edits',
}
# A small state-transition machine. key==None indicates the start
# state. value==None means that the key is an end state (that is,
# its presence indicates the record is finished).
_EXPECTED_NEXT_RE = {
None: frozenset([_ADD_SECTION_RE, _NO_EDITS_RE]),
_ADD_SECTION_RE: frozenset([_REMOVE_SECTION_RE]),
_REMOVE_SECTION_RE: frozenset([_TOTAL_SECTION_RE]),
_TOTAL_SECTION_RE: frozenset([_SECTION_END_RE]),
_SECTION_END_RE: None,
_NO_EDITS_RE: None,
}
def __init__(self):
# This is set to one of the 'section' REs above. None is the start-state.
self.current_section = None
self.filename = '<unknown file>'
self.lines_by_section = {} # key is an RE, value is a list of lines
def _ProcessOneLine(self, line, basedir=None):
"""Reads one line of input, updates self, and returns False at EORecord.
If the line matches one of the hard-coded section names, updates
self.filename and self.current_section. Otherwise, the line is
taken to be a member of the currently active section, and is added
to self.lines_by_section.
Arguments:
line: one line from the iwyu input file.
Returns:
False if the line is the end-of-section marker, True otherwise.
Raises:
FixIncludesError: if there is an out-of-order section or
mismatched filename.
"""
line = line.rstrip() # don't worry about line endings
if not line: # just ignore blank lines
return True
for (section_re, section_name) in self._RE_TO_NAME.items():
m = section_re.search(line)
if m:
# Check or set the filename (if the re has a group, it's for filename).
if section_re.groups >= 1:
this_filename = NormalizeFilePath(basedir, m.group(1))
if (self.current_section is not None and
this_filename != self.filename):
raise FixIncludesError('"%s" section for %s comes after "%s" for %s'
% (section_name, this_filename,
self._RE_TO_NAME[self.current_section],
self.filename))
self.filename = this_filename
# Check and set the new section we're entering.
if section_re not in self._EXPECTED_NEXT_RE[self.current_section]:
if self.current_section is None:
raise FixIncludesError('%s: "%s" section unexpectedly comes first'
% (self.filename, section_name))
else:
raise FixIncludesError('%s: "%s" section unexpectedly follows "%s"'
% (self.filename, section_name,
self._RE_TO_NAME[self.current_section]))
self.current_section = section_re
# We're done parsing this record if this section has nothing after it.
return self._EXPECTED_NEXT_RE[self.current_section] is not None
# We're not starting a new section, so just add to the current section.
# We ignore lines before section-start, they're probably things like
# compiler messages ("Compiling file foo").
if self.current_section is not None:
self.lines_by_section.setdefault(self.current_section, []).append(line)
return True
def ParseOneRecord(self, iwyu_output, flags):
"""Given a file object with output from an iwyu run, return per file info.
For each source file that iwyu_output mentions (because iwyu was run on
it), we return a structure holding the information in IWYUOutputRecord:
1) What file these changes apply to
2) What line numbers hold includes/fwd-declares to remove
3) What includes/fwd-declares to add
4) Ordering information for includes and fwd-declares
Arguments:
iwyu_output: a File object returning lines from an iwyu run
flags: commandline flags, as parsed by argparse. We use
flags.comments, which controls whether we output comments
generated by iwyu.
Returns:
An IWYUOutputRecord object, or None at EOF.
Raises:
FixIncludesError: for malformed-looking lines in the iwyu output.
"""
for line in iwyu_output:
if not self._ProcessOneLine(line, flags.basedir):
# returns False at end-of-record
break
else: # for/else
return None # at EOF
# Now set up all the fields in an IWYUOutputRecord.
# IWYUOutputRecord.filename
retval = IWYUOutputRecord(self.filename)
# IWYUOutputRecord.lines_to_delete
for line in self.lines_by_section.get(self._REMOVE_SECTION_RE, []):
m = self._LINE_NUMBERS_COMMENT_RE.search(line)
if not m:
raise FixIncludesError('line "%s" (for %s) has no line number'
% (line, self.filename))
# The RE is of the form [start_line, end_line], inclusive.
for line_number in range(int(m.group(1)), int(m.group(2)) + 1):
retval.lines_to_delete.add(line_number)
# IWYUOutputRecord.some_include_lines
for line in (self.lines_by_section.get(self._REMOVE_SECTION_RE, []) +
self.lines_by_section.get(self._TOTAL_SECTION_RE, [])):
if not _INCLUDE_RE.match(line):
continue
m = self._LINE_NUMBERS_COMMENT_RE.search(line)
if not m:
continue # not all #include lines have line numbers, but some do
for line_number in range(int(m.group(1)), int(m.group(2)) + 1):
retval.some_include_lines.add(line_number)
# IWYUOutputRecord.seen_forward_declare_lines
for line in (self.lines_by_section.get(self._REMOVE_SECTION_RE, []) +
self.lines_by_section.get(self._TOTAL_SECTION_RE, [])):
# Everything that's not an #include is a forward-declare.
if line.startswith('- '): # the 'remove' lines all start with '- '.
line = line[len('- '):]
if _INCLUDE_RE.match(line):
continue
m = self._LINE_NUMBERS_COMMENT_RE.search(line)
if m:
line_range = (int(m.group(1)), int(m.group(2))+1)
retval.seen_forward_declare_lines.add(line_range)
if '::' in line:
retval.nested_forward_declare_lines.add(line_range)
# IWYUOutputRecord.includes_and_forward_declares_to_add
for line in self.lines_by_section.get(self._ADD_SECTION_RE, []):
line = _COMMENT_RE.sub('', line)
retval.includes_and_forward_declares_to_add.add(line)
# IWYUOutputRecord.full_include_lines
for line in self.lines_by_section.get(self._TOTAL_SECTION_RE, []):
m = _INCLUDE_RE.match(line)
if m:
if not flags.comments:
line = _COMMENT_RE.sub('', line) # pretend there were no comments
else:
# Just remove '// line XX': that's iwyu metadata, not a real comment
line = self._LINE_NUMBERS_COMMENT_RE.sub('', line)
retval.full_include_lines[m.group(1)] = line
return retval
class LineInfo(object):
"""Information about a single line of a source file."""
def __init__(self, line):
"""Initializes the content of the line, but no ancillary fields."""
# The content of the line in the input file
self.line = line
# The 'type' of the line. The 'type' is one of the regular
# expression objects in _LINE_TYPES, or None for any line that
# does not match any regular expression in _LINE_TYPES.
self.type = None
# True if no lines processed before this one have the same type
# as this line.
self.is_first_line_of_this_type = False
# Set to true if we want to delete/ignore this line in the output
# (for instance, because iwyu says to delete this line). At the
# start, the only line to delete is the 'dummy' line 0.
self.deleted = self.line is None
# If this line is an #include or a forward-declare, gives a
# [begin,end) pair saying the 'span' this line is part of. We do
# this for two types of span: the move span (an #include or
# forward declare, along with any preceding comments) and the
# reorder span (a continguous block of move-spans, connected only
# by blank lines and comments). For lines that are not an
# #include or forward-declare, these may have an arbitrary value.
self.move_span = None
self.reorder_span = None
# If this line is an #include or a forward-declare, gives the
# 'key' of the line. For #includes it is the filename included,
# including the ""s or <>s. For a forward-declare it's the name
# of the class/struct. For other types of lines, this is None.
self.key = None
# If this is a forward-declaration of a nested class, then this will be
# True.
self.is_nested_forward_declaration = False
def __str__(self):
if self.deleted:
line = 'XX-%s-XX' % self.line
else:
line = '>>>%s<<<' % self.line
if self.type is None:
type_id = None
else:
type_id = _LINE_TYPES.index(self.type)
return ('%s\n -- type: %s (key: %s). move_span: %s. reorder_span: %s'
% (line, type_id, self.key, self.move_span, self.reorder_span))
class FileInfo(object):
""" Details about a file's storage encoding """
DEFAULT_LINESEP = os.linesep
DEFAULT_ENCODING = 'utf-8'
def __init__(self, linesep, encoding):
self.linesep = linesep
self.encoding = encoding
@staticmethod
def parse(filename):
""" Return a FileInfo object describing file encoding details. """
with open(filename, 'rb') as f:
content = f.read()
linesep = FileInfo.guess_linesep(content)
encoding = FileInfo.guess_encoding(content)
return FileInfo(linesep, encoding)
@staticmethod
def guess_linesep(bytebuf):
""" Return most frequent line separator of buffer. """
win = bytebuf.count(b'\r\n')
unix = bytebuf.count(b'\n') - win
if win > unix:
return '\r\n'
elif unix > win:
return '\n'
return FileInfo.DEFAULT_LINESEP
@staticmethod
def guess_encoding(bytebuf):
""" Return approximate encoding for buffer.
This is heavily heuristic, and will return any supported encoding that can
describe the file without losing information, not necessarily the *right*
encoding. This is usually OK, because IWYU typically only adds ASCII content
(or content pulled from the file itself).
"""
def try_decode(buf, encoding):
try:
buf.decode(encoding, errors='strict')
except UnicodeError:
return False
return True
# Special-case UTF-8 BOM
if bytebuf[0:3] == b'\xef\xbb\xbf':
if try_decode(bytebuf, 'utf-8-sig'):
return 'utf-8-sig'
encodings = ['ascii', 'utf-8', 'windows-1250', 'windows-1252']
for encoding in encodings:
if try_decode(bytebuf, encoding):
return encoding
return FileInfo.DEFAULT_ENCODING
def _ReadFile(filename, fileinfo):
"""Read from filename and return a list of file lines."""
try:
with open(filename, 'rb') as f:
content = f.read()
# Call splitlines with True to keep the original line
# endings. Later in WriteFile, they will be used as-is.
# This will reduce spurious changes to the original files.
# The lines we add will have the linesep determined by
# FileInfo.
return content.decode(fileinfo.encoding).splitlines(True)
except (IOError, OSError) as why:
print("Skipping '%s': %s" % (filename, why))
return None
def _WriteFile(filename, fileinfo, file_lines):
"""Write the given file-lines to the file."""
try:
with open(filename, 'wb') as f:
# file_lines already have line endings, so join with ''.
content = ''.join(file_lines)
content = content.encode(fileinfo.encoding)
f.write(content)
except (IOError, OSError) as why:
print("Error writing '%s': %s" % (filename, why))
def PrintFileDiff(old_file_contents, new_file_contents):
"""Print a unified diff between files, specified as lists of lines."""
diff = difflib.unified_diff(old_file_contents, new_file_contents)
# skip the '--- <filename>/+++ <filename>' lines at the start
try:
next(diff)
next(diff)
print('\n'.join(l.rstrip() for l in diff))
except StopIteration:
pass
def _MarkHeaderGuardIfPresent(file_lines):
"""If any line in file_lines is a header-guard, mark it in file_lines.
We define a header-guard as follows: an #ifdef where there is
nothing contentful before or after the #ifdef. Also, the #ifdef
should have no #elif in it (though we don't currently test that).
This catches the common case of an 'ifdef guard' in .h file, such
as '#ifndef FOO_H\n#define FOO_H\n...contents...\n#endif', but it
can also catch other whole-program #ifdefs, such as
'#ifdef __linux\n...\n#endif'. The issue here is that if an #ifdef
encloses the entire file, then we are willing to put new
#includes/fwd-declares inside the #ifdef (which normally we
wouldn't do). So we want to mark such #ifdefs with a special label.
If we find such an #ifdef line -- and a single file can have at most
one -- we change its type to a special type for header guards.
Arguments:
file_lines: an array of LineInfo objects with .type filled in.
"""
# Pass over blank lines, pragmas and comments at the top of the file.
i = 0
for i in range(len(file_lines)):
if (not file_lines[i].deleted and
file_lines[i].type not in [_COMMENT_LINE_RE, _BLANK_LINE_RE,
_PRAGMA_ONCE_LINE_RE]):
break
else: # for/else: got to EOF without finding any non-blank/comment lines
return
# This next line is the candidate header guard-line.
ifdef_start = i
if file_lines[ifdef_start].type != _IF_RE:
# Not a header guard, just return without doing anything.
return
# Find the end of this ifdef, to see if it's really a header guard..
ifdef_depth = 0
for ifdef_end in range(ifdef_start, len(file_lines)):
if file_lines[ifdef_end].deleted:
continue
if file_lines[ifdef_end].type == _IF_RE:
ifdef_depth += 1
elif file_lines[ifdef_end].type == _ENDIF_RE:
ifdef_depth -= 1
if ifdef_depth == 0: # The end of our #ifdef!
break
else: # for/else
return False # Weird: never found a close to this #ifdef
# Finally, all the lines after the end of the ifdef must be blank or comments.
for i in range(ifdef_end + 1, len(file_lines)):
if (not file_lines[i].deleted and
file_lines[i].type not in [_COMMENT_LINE_RE, _BLANK_LINE_RE]):
return
# We passed the gauntlet!
file_lines[ifdef_start].type = _HEADER_GUARD_RE
# And the line after the header guard #ifdef is the '#define' (usually).
if _HEADER_GUARD_DEFINE_RE.match(file_lines[ifdef_start + 1].line):
file_lines[ifdef_start+1].type = _HEADER_GUARD_DEFINE_RE
def _CalculateLineTypesAndKeys(file_lines, iwyu_record):
"""Fills file_line's type and key fields, where the 'type' is a regexp object.
We match each line (line_info.line) against every regexp in
_LINE_TYPES, and assign the first that matches, or None if none
does. We also use iwyu_record's some_include_lines and
seen_forward_declare_lines to identify those lines. In fact,
that's the only data source we use for forward-declare lines.
Sets file_line.type and file_line.is_first_line_of_this_type for
each file_line in file_lines.
Arguments:
file_lines: an array of LineInfo objects with .line fields filled in.
iwyu_record: the IWYUOutputRecord struct for this source file.
Raises:
FixIncludesError: if iwyu_record's line-number information is
is inconsistent with what we see in the file. (For instance,
it says line 12 is an #include, but we say it's a blank line,
or the file only has 11 lines.)
"""
seen_types = set()
in_c_style_comment = False
in_allman_or_mixed_namespace = False
for line_info in file_lines:
if line_info.line is None:
line_info.type = None
elif _C_COMMENT_START_RE.match(line_info.line):
# Note: _C_COMMENT_START_RE only matches a comment at the start
# of a line. Comments in the middle of a line are ignored.
# This can cause problems with multi-line comments that start
# in the middle of the line, but that's hopefully quite rare.
# TODO(csilvers): check for that case.
m = _C_COMMENT_END_RE.match(line_info.line)
if not m: # comment continues onto future lines
line_info.type = _COMMENT_LINE_RE
in_c_style_comment = True
elif not m.group(1): # comment extends across entire line (only)
line_info.type = _COMMENT_LINE_RE
else: # comment takes only part of line, treat as content
# TODO(csilvers): this mis-diagnoses lines like '/*comment*/class Foo;'
line_info.type = None
elif in_c_style_comment and _C_COMMENT_END_RE.match(line_info.line):
line_info.type = _COMMENT_LINE_RE
in_c_style_comment = False
elif in_c_style_comment:
line_info.type = _COMMENT_LINE_RE
elif (in_allman_or_mixed_namespace and
_NAMESPACE_CONTINUE_ALLMAN_MIXED_RE.match(line_info.line)):
in_allman_or_mixed_namespace = False
line_info.type = _NAMESPACE_CONTINUE_ALLMAN_MIXED_RE
else:
for type_re in _LINE_TYPES:
# header-guard-define-re has a two-part decision criterion: it
# matches the RE, *and* it comes after a header guard line.
# That's too complex to figure out now, so we skip over it now
# and fix it up later in _MarkHeaderGuardIfPresent().
if type_re in (_HEADER_GUARD_DEFINE_RE,):
continue
m = type_re.match(line_info.line)
if m:
line_info.type = type_re
if type_re == _INCLUDE_RE:
line_info.key = m.group(1) # get the 'key' for the #include.
elif type_re in (_NAMESPACE_START_ALLMAN_RE,
_NAMESPACE_START_MIXED_RE):
# set in_allman_or_mixed_namespace to true to find the next {
in_allman_or_mixed_namespace = True
break
else: # for/else
line_info.type = None # means we didn't match any re
line_info.is_first_line_of_this_type = (line_info.type not in seen_types)
seen_types.add(line_info.type)
# Now double-check against iwyu that we got all the #include lines right.
for line_number in iwyu_record.some_include_lines:
if file_lines[line_number].type != _INCLUDE_RE:
raise FixIncludesError('iwyu line number %s:%d (%s) is not an #include'
% (iwyu_record.filename, line_number,
file_lines[line_number].line))
# We depend entirely on the iwyu_record for the forward-declare lines.
for (start_line, end_line) in iwyu_record.seen_forward_declare_lines:
for line_number in range(start_line, end_line):
if line_number >= len(file_lines):
raise FixIncludesError('iwyu line number %s:%d is past file-end'
% (iwyu_record.filename, line_number))
file_lines[line_number].type = _FORWARD_DECLARE_RE
for (start_line, end_line) in iwyu_record.nested_forward_declare_lines:
for line_number in range(start_line, end_line):
if line_number >= len(file_lines):
raise FixIncludesError('iwyu line number %s:%d is past file-end'
% (iwyu_record.filename, line_number))
file_lines[line_number].is_nested_forward_declaration = True
# While we're at it, let's do a bit more sanity checking on iwyu_record.
for line_number in iwyu_record.lines_to_delete:
if line_number >= len(file_lines):
raise FixIncludesError('iwyu line number %s:%d is past file-end'
% (iwyu_record.filename, line_number))
elif file_lines[line_number].type not in (_INCLUDE_RE,
_FORWARD_DECLARE_RE):
raise FixIncludesError('iwyu line number %s:%d (%s) is not'
' an #include or forward declare'
% (iwyu_record.filename, line_number,
file_lines[line_number].line))
# Check if this file has a header guard, which for our purposes is
# an #ifdef (or #if) that covers an entire source file. Usually
# this will be a standard .h header-guard, but it could be something
# like '#if __linux/#endif'. The point here is that if an #ifdef
# encloses the entire file, then we are willing to put new
# #includes/fwd-declares inside the #ifdef (which normally we
# wouldn't do). So we mark such #ifdefs with a special label.
_MarkHeaderGuardIfPresent(file_lines)
def _PreviousNondeletedLine(file_lines, line_number):
"""Returns the line number of the previous not-deleted line, or None."""
for line_number in range(line_number - 1, -1, -1):
if not file_lines[line_number].deleted:
return line_number
return None
def _NextNondeletedLine(file_lines, line_number):
"""Returns the line number of the next not-deleted line, or None."""
for line_number in range(line_number + 1, len(file_lines)):
if not file_lines[line_number].deleted:
return line_number
return None
def _LineNumberStartingPrecedingComments(file_lines, line_number):
"""Returns the line-number for the comment-lines preceding the given linenum.
Looking at file_lines, look at the lines immediately preceding the
given line-number. If they're comment lines, return the first line
of the comment lines preceding the given line. Otherwise, return
the given line number.
As a special case, if the comments go all the way up to the first
line of the file (line 1), we assume they're comment lines, which
are special -- they're not associated with any source code line --
and we return line_number in that case.
Arguments:
file_lines: an array of LineInfo objects, with .type fields filled in.
line_number: an index into file_lines.
Returns:
The first line number of the preceding comments, or line_number
if there are no preceding comments or they appear to be a
top-of-file copyright notice.
"""
retval = line_number
while retval > 0 and file_lines[retval - 1].type == _COMMENT_LINE_RE:
retval -= 1
if retval <= 1: # top-of-line comments
retval = line_number # so ignore all the comment lines
return retval
def _CalculateMoveSpans(file_lines, forward_declare_spans):
"""Fills each input_line's move_span field.
A 'move span' is a range of lines (from file_lines) that includes
an #include or forward-declare, and all the comments preceding it.
It is the unit we would move if we decided to move (or delete) this
#include or forward-declare.
For lines of type _INCLUDE_RE or _FORWARD_DECLARE_RE, the move span
is set to the tuple [start_of_span, end_of_span). All other lines
have the move span kept at None.
Arguments:
file_lines: an array of LineInfo objects, with .type fields filled in.
forward_declare_spans: a set of line-number pairs
[start_line, end_line), each representing a single namespace.
In practice this comes from iwyu_record.seen_forward_declare_lines.
"""
# First let's do #includes.
for line_number in range(len(file_lines)):
if file_lines[line_number].type == _INCLUDE_RE:
span_begin = _LineNumberStartingPrecedingComments(file_lines, line_number)
for i in range(span_begin, line_number + 1):
file_lines[i].move_span = (span_begin, line_number + 1)
# Now forward-declares. These spans come as input to this function.
for (span_begin, span_end) in forward_declare_spans:
span_begin = _LineNumberStartingPrecedingComments(file_lines, span_begin)
for i in range(span_begin, span_end):
file_lines[i].move_span = (span_begin, span_end)
def _ContainsBarrierInclude(file_lines, line_range):
"""Returns true iff some line in [line_range[0], line_range[1]) is BARRIER."""
for line_number in range(*line_range):
if (not file_lines[line_number].deleted and
_BARRIER_INCLUDES.search(file_lines[line_number].line)):
return True
return False
def _LinesAreAllBlank(file_lines, start_line, end_line):
"""Returns true iff all lines in [start_line, end_line) are blank/deleted."""
for line_number in range(start_line, end_line):
if (not file_lines[line_number].deleted and
file_lines[line_number].type != _BLANK_LINE_RE):
return False
return True
def _CalculateReorderSpans(file_lines):
"""Fills each input_line's reorder_span field.
A 'reorder span' is a range of lines (from file_lines) that only has
#includes and forward-declares in it (and maybe blank lines, and
comments associated with #includes or forward-declares). In
particular, it does not include any "real code" besides #includes
and forward-declares: no functions, no static variable assignment,
no macro #defines, no nothing. We are willing to reorder #includes
and namespaces freely inside a reorder span.
Calculating reorder_span is easy: they're just the union of
contiguous move-spans (with perhaps blank lines and comments
thrown in), because move-spans share the 'no actual code'
requirement.
There's one exception: if any move-span matches the
_BARRIER_INCLUDES regexp, it means that we should consider that
move-span to be a 'barrier': nothing should get reordered from one
side of that move-span to the other. (This is used for #includes
that depend on other #includes being before them to function
properly.) We do that by putting them into their own reorder span.
For lines of type _INCLUDE_RE or _FORWARD_DECLARE_RE, the reorder
span is set to the tuple [start_of_span, end_of_span). All other
lines have an arbitrary value for the reorder span.
Arguments:
file_lines: an array of LineInfo objects with .type and .move_span
fields filled in.
"""
# Happily, move_spans are disjoint. Just make sure they're sorted and unique.
move_spans = [s.move_span for s in file_lines if s.move_span is not None]
sorted_move_spans = sorted(set(move_spans))
i = 0
while i < len(sorted_move_spans):
reorder_span_start = sorted_move_spans[i][0]
# If we're a 'nosort' include, we're always in a reorder span of
# our own. Otherwise, add in the next move span if we're
# connected to it only by blank lines.
if not _ContainsBarrierInclude(file_lines, sorted_move_spans[i]):
while i < len(sorted_move_spans) - 1:
move_span_end = sorted_move_spans[i][1]
next_move_span_start = sorted_move_spans[i+1][0]
if (_LinesAreAllBlank(file_lines, move_span_end, next_move_span_start)
and not _ContainsBarrierInclude(file_lines, sorted_move_spans[i+1])):
i += 1
else:
break
reorder_span_end = sorted_move_spans[i][1]
# We'll map every line in the span to the span-extent.
for line_number in range(reorder_span_start, reorder_span_end):
file_lines[line_number].reorder_span = (reorder_span_start,
reorder_span_end)
i += 1
def ParseOneFile(f, iwyu_record):
"""Given a file object, read and classify the lines of the file.
For each file that iwyu_output mentions, we return a list of LineInfo
objects, which is a parsed version of each line, including not only
its content but its 'type', its 'key', etc.
Arguments:
f: an iterable object returning lines from a file.
iwyu_record: the IWYUOutputRecord struct for this source file.
Returns:
An array of LineInfo objects. The first element is always a dummy
element, so the first line of the file is at retval[1], matching
the way iwyu counts line numbers.
"""
file_lines = [LineInfo(None)]
for line in f:
file_lines.append(LineInfo(line))
_CalculateLineTypesAndKeys(file_lines, iwyu_record)
_CalculateMoveSpans(file_lines, iwyu_record.seen_forward_declare_lines)
_CalculateReorderSpans(file_lines)
return file_lines
def _DeleteEmptyNamespaces(file_lines):
"""Delete namespaces with nothing in them.
Empty namespaces could be caused by transformations that removed
forward-declarations:
namespace foo {
class Myclass;
}
->
namespace foo {
}