forked from wosayttn/formatting
-
Notifications
You must be signed in to change notification settings - Fork 0
/
formatting.py
203 lines (176 loc) · 8.12 KB
/
formatting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# -*- coding: UTF-8 -*-
# COPYRIGHT (C) 2021, Meco Jianting Man <[email protected]>
#
# SPDX-License-Identifier: MIT License
#
# Change Logs:
# Date Author Notes
# 2021-03-02 Meco Man The first version
# 2021-03-04 Meco Man 增加统一转换成UTF-8编码格式功能
# 2021-03-06 Meco Man 增加人工介入处理交互功能
# 2021-03-07 Meco Man 增加将RT-Thread版权信息的截至年份修改至今年功能
# 2021-03-14 Meco Man 增加将上海睿赛德版权信息的截至年份修改至今年功能
# 2021-06-07 iysheng Add support with format single file and parse command line parameters
# 2021-08-24 陈迎春 解决格式化脚本需要和被格式化文件放在同一个磁盘的问题
# 2021-08-29 Meco Man 优化文件后缀名的判断
# 本文件会自动对指定路径下的所有文件包括子文件夹的文件(.c/.h/.cpp/.hpp)进行扫描
# 1)将源文件编码统一为UTF-8
# 2)将TAB键替换为空格
# 3)将每行末尾多余的空格删除,并统一换行符为'\n'
# 4)将RT-Thread版权信息的截至年份修改至今年
# 5)将上海睿赛德版权信息的截至年份修改至今年
# 使用时只需要双击本文件,输入要扫描的文件夹路径即可
# 不能保证100%全部成功转换为UTF-8,有一些编码特殊或识别不准确会在终端打印信息,需人工转换
# 欢迎对本文件的功能继续做出补充,欢迎提交PR
import os
import sys
import re
import chardet
import datetime
# 用空格代替TAB键
# 这里并不是简单的将TAB替换成4个空格
# 空格个数到底是多少需要计算,因为TAB制表本身有自动对齐的功能
def tab2spaces(line):
list_str = list(line) # 字符串打散成列表,方便操作
i = list_str.count('\t')
while i > 0:
ptr = list_str.index('\t')
del list_str[ptr]
space_need_to_insert = 4 - (ptr % 4)
j = 0
while j < space_need_to_insert:
list_str.insert(ptr, ' ')
j = j + 1
i = i - 1
line = ''.join(list_str) # 列表恢复成字符串
return line
# 删除每行末尾多余的空格 统一使用\n作为结尾
def formattail(line):
line = line.rstrip()
line = line + '\n'
return line
#搜索RT-Thread版权信息的截至年份修改至今年
def change_rtthread_copyright_year(line):
search_result = re.search('200[0-9]-20[0-9][0-9]', line, flags=0) # 搜索200x-20xx字样
if search_result != None:
if re.search('RT-Thread', line, flags=0) != None: # 同时可以在本行中搜索到‘RT-Thread’字样
end = search_result.end()
str_year = str(datetime.datetime.now().year)
line = line.replace(line[end-4:end], str_year)# 将20xx替换为今年
return line
#搜索Real-Thread睿赛德版权信息的截至年份修改至今年
def change_realthread_copyright_year(line):
search_result = re.search('20[0-9][0-9]-20[0-9][0-9]', line, flags=0) # 搜索20xx-20xx字样
if search_result != None:
if re.search('Real-Thread', line, flags=0) != None: # 同时可以在本行中搜索到‘Real-Thread’字样
end = search_result.end()
str_year = str(datetime.datetime.now().year)
line = line.replace(line[end-4:end], str_year)# 将20xx替换为今年
return line
# 对单个文件进行格式整理
def format_codes(filename):
try:
filepath = os.path.dirname(filename)
# 将temp_file放在和filename相同的路径下
temp_file = filepath + "temp"
file = open(filename, 'r', encoding='utf-8')
file_temp = open(temp_file, 'w', encoding='utf-8')
line_num = 0
for line in file:
line = tab2spaces(line)
line = formattail(line)
line_num = line_num + 1
if line_num < 20: #文件前20行对版权头注释进行扫描,找到截至年份并修改至今年
line = change_rtthread_copyright_year(line)
line = change_realthread_copyright_year(line)
file_temp.write(line)
file_temp.close()
file.close()
os.remove(filename)
os.rename(temp_file, filename)
except UnicodeDecodeError:
print("解码失败,该文件处理失败" + filename)
file_temp.close()
file.close()
except UnicodeEncodeError:
print("编码失败,该文件处理失败" + filename)
file_temp.close()
file.close()
def get_encode_info(file):
encoding = None
with open(file, 'rb') as f:
encode_info = chardet.detect(f.read())
encoding = encode_info['encoding']
confidence = encode_info['confidence']
# 对编码的判断可靠性小于90%不予以处理,需要人工介入处理
if confidence < 0.90:
if encoding != None:
print('--------------------------------------------------------------------------')
print('未处理,需人工确认(Unprocessed, manual confirmation is required): ' + encoding + ': ' + file) # 需要人工确认
print('自动判读结果仅供参考:')
print(encode_info)
man_result = input('1.GB2312\n2.Windows-1252\n3.utf-8\n4.手动输入其他类型编码(Manually enter other type codes)\n5.略过本文件(skip this document)\n请输入人工研判结果(Please enter the manual judgment result): ')
if man_result == '1':
encoding = 'GB2312'
elif man_result == '2':
encoding == 'Windows-1252'
elif man_result == '3':
encoding == 'utf-8'
elif man_result == '4':
encoding = input('请输入编码类型(Please enter code type): ')
elif man_result == '5':
print('本文件略过,继续处理其他文件(Skip this document and continue processing other documents)...')
encoding = None
else:
print('输入参数无效,本文件略过,继续处理其他文件(The input parameters are invalid, skip this file and continue to process other files)...')
return encoding
# 将单个文件转为UTF-8编码
def convert_to_utf_8(path):
encoding = get_encode_info(path)
if encoding == None:
return False # 转换失败
if encoding == 'utf-8': # 若检测到编码为UTF-8则直接返回成功
return True
try:
file = open(path, 'rb+')
data = file.read()
string = data.decode(encoding)
utf = string.encode('utf-8')
file.seek(0)
file.write(utf)
file.truncate()
file.close()
return True # 转换成功
except UnicodeDecodeError:
print("解码失败,该文件处理失败" + path)
return False
except UnicodeEncodeError:
print("编码失败,该文件处理失败" + path)
return False
def formatfile(file):
if os.path.splitext(file)[1] in ['.c', '.h', '.cpp', '.hpp']: #处理.c/.h/.cpp/.hpp文件
# if os.path.splitext(file)[1] in ['.md']: #处理markdown文档
# if os.path.split(file)[1] in ['Kconfig', 'SConscript', 'SConstruct']: #处理Kconfig Sconscript
# if os.path.splitext(file)[1] in ['.json']: #处理.json文件
if convert_to_utf_8(file) == True: #先把这个文件转为UTF-8编码,1成功
format_codes(file) #再对这个文件进行格式整理
# 递归扫描目录下的所有文件
def traversalallfile(path):
filelist = os.listdir(path)
for file in filelist:
filepath = os.path.join(path, file)
if os.path.isdir(filepath):
traversalallfile(filepath)
elif os.path.isfile(filepath):
formatfile(filepath)
def formatfiles():
if len(sys.argv) > 1:
worktarget = sys.argv[1] # use the first command line parameter as worktarget
else:
worktarget = input('Please enter work path or file to format: ')
if os.path.isdir(worktarget):
traversalallfile(worktarget)
else:
formatfile(worktarget)
if __name__ == '__main__':
formatfiles()