diff --git a/IDAGenDFG.py b/IDAGenDFG.py new file mode 100644 index 0000000..2ead6d2 --- /dev/null +++ b/IDAGenDFG.py @@ -0,0 +1,49 @@ +import idaapi +import idautils +import idc +import ida_pro +import ida_auto +import os, sys +from libdataflow import ida_dataflow_analysis +from argparse import ArgumentParser + +def main(OUTPUT_DIR:str) -> None: + os.makedirs(OUTPUT_DIR, exist_ok=True) + + textStartEA = 0 + textEndEA = 0 + for seg in idautils.Segments(): + if (idc.get_segm_name(seg)==".text"): + textStartEA = idc.get_segm_start(seg) + textEndEA = idc.get_segm_end(seg) + break + + for func in idautils.Functions(textStartEA, textEndEA): + # Ignore Library Code + flags = idc.get_func_attr(func, idc.FUNCATTR_FLAGS) + if flags & idc.FUNC_LIB: + print(hex(func), "FUNC_LIB", idc.get_func_name(func)) + continue + try: + ida_dataflow_analysis(func, idc.get_func_name(func), OUTPUT_DIR, defuse_only=True) + except Exception as e: + print('Skip function {} due to dataflow analysis error: {}'.format(idc.get_func_name(func),e)) + +if __name__ == '__main__': + if len(idc.ARGV) < 2: + print('\n\nGenerating DFG & Def-Use Graph with IDA Pro and MIASM') + print('\tNeed to specify the output dir with -o option') + print('\tUsage: /path/to/ida -A -Lida.log -S"{} -o " /path/to/binary\n\n'.format(idc.ARGV[0])) + ida_pro.qexit(1) + + parser = ArgumentParser(description="IDAPython script for generating dataflow graph of each function in the given binary") + parser.add_argument("-o", "--output_dir", help="Output dir", default='./outputs', nargs='?') + # parser.add_argument("-s", "--symb", help="Symbolic execution mode", + # action="store_true") + args = parser.parse_args() + + ida_auto.auto_wait() + + main(args.output_dir) + + ida_pro.qexit(0) \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..3d64fce --- /dev/null +++ b/LICENSE @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + ida-dfg + Copyright (C) 2022 wenyu zhu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..8387752 --- /dev/null +++ b/README.md @@ -0,0 +1,42 @@ +# ida-dfg + +IDA Pro data-flow graph generator + +Tested with IDA Pro 7.6 and miasm 7ee593d + +## libdataflow.py + +封装了两个核心接口给其他脚本用 + - `ida_dataflow_analysis`: 面向IDA + MIASM的场景 + - `miasm_dataflow_analysis`: 单独使用,不需要IDA Pro + +## IDAGenDFG.py + +IDAPython调用的脚本 + +`/path/to/ida -A -Lida.log -S"path/to/IDAGenDFG.py -o " /path/to/binary` + +## deprecated/graph_dataflow.py + +新版miasm支持的DFG/ReachinDefinition/DefUse分析 + +## deprecated/libdfg.py + +代码升级 & debug工作停止,因为新版miasm自身支持dfg生成。 + +但是这部分代码的价值在于学习如何将miasm用到IDAPython里,详见`dataflow_analysis`函数。 + + + +## miasm的一些核心概念: + - machine类: 定义架构、反汇编引擎、lifter + - LocationDB类:各类数据结构的loc_key(unique id),例如AsmBlock, IRBlock的loc_key;以及定义了offset和loc_key相互转换的函数 + - Instruction类:可以在miasm.core.cpu内查看其成员函数、变量 + - AsmCFG类、AsmBlock类:汇编控制流图、基本块 + - IRBlock类、AssignBlock类:AsmBlock经Lifter翻译得到IRBlock,每一个IRBlock有若干个AssignBlock + * 每个AssignBlock对应一条IR赋值语句(src -> dst),同时也可以对应回一条汇编指令(assignblk.instr) + +## miasm的局限性 + + - 反汇编较慢 + - 无法处理80bit浮点数 diff --git a/libdataflow.py b/libdataflow.py new file mode 100644 index 0000000..46c14b9 --- /dev/null +++ b/libdataflow.py @@ -0,0 +1,276 @@ +import os +from future.utils import viewitems, viewvalues +from utils import guess_machine + +from miasm.analysis.binary import Container +from miasm.analysis.machine import Machine +from miasm.expression.expression import get_expr_mem +from miasm.analysis.data_analysis import inter_block_flow #, intra_block_flow_raw +from miasm.core.graph import DiGraph +from miasm.ir.symbexec import SymbolicExecutionEngine +from miasm.analysis.data_flow import DeadRemoval, ReachingDefinitions, DiGraphDefUse +from miasm.core.locationdb import LocationDB +from miasm.core.bin_stream_ida import bin_stream_ida + +def intra_block_flow_symb(lifter, _, flow_graph, irblock, in_nodes, out_nodes): + symbols_init = lifter.arch.regs.regs_init.copy() + sb = SymbolicExecutionEngine(lifter, symbols_init) + sb.eval_updt_irblock(irblock) + print('*' * 40) + print(irblock) + + + out = sb.modified(mems=False) + current_nodes = {} + # Gen mem arg to mem node links + for dst, src in out: + src = sb.eval_expr(dst) + for n in [dst, src]: + + all_mems = set() + all_mems.update(get_expr_mem(n)) + + for n in all_mems: + node_n_w = (irblock.loc_key, 0, n) + if not n == src: + continue + o_r = n.ptr.get_r(mem_read=False, cst_read=True) + for i, n_r in enumerate(o_r): + if n_r in current_nodes: + node_n_r = current_nodes[n_r] + else: + node_n_r = (irblock.loc_key, i, n_r) + if not n_r in in_nodes: + in_nodes[n_r] = node_n_r + flow_graph.add_uniq_edge(node_n_r, node_n_w) + + # Gen data flow links + for dst in out: + src = sb.eval_expr(dst) + nodes_r = src.get_r(mem_read=False, cst_read=True) + nodes_w = set([dst]) + for n_r in nodes_r: + if n_r in current_nodes: + node_n_r = current_nodes[n_r] + else: + node_n_r = (irblock.loc_key, 0, n_r) + if not n_r in in_nodes: + in_nodes[n_r] = node_n_r + + flow_graph.add_node(node_n_r) + for n_w in nodes_w: + node_n_w = (irblock.loc_key, 1, n_w) + out_nodes[n_w] = node_n_w + + flow_graph.add_node(node_n_w) + flow_graph.add_uniq_edge(node_n_r, node_n_w) + + + +def intra_block_flow_raw(lifter, ircfg, flow_graph, irb, in_nodes, out_nodes): + """ + Create data flow for an irbloc using raw IR expressions + """ + current_nodes = {} + for i, assignblk in enumerate(irb): + dict_rw = assignblk.get_rw(cst_read=True) + current_nodes.update(out_nodes) + + # gen mem arg to mem node links + all_mems = set() + for node_w, nodes_r in viewitems(dict_rw): + for n in nodes_r.union([node_w]): + all_mems.update(get_expr_mem(n)) + if not all_mems: + continue + + for n in all_mems: + node_n_w = (hex(assignblk.instr.offset), i, n) + if not n in nodes_r: + continue + o_r = n.ptr.get_r(mem_read=False, cst_read=True) + for n_r in o_r: + if n_r in current_nodes: + node_n_r = current_nodes[n_r] + else: + node_n_r = (hex(assignblk.instr.offset), i, n_r) + current_nodes[n_r] = node_n_r + in_nodes[n_r] = node_n_r + flow_graph.add_uniq_edge(node_n_r, node_n_w) + + # gen data flow links + for node_w, nodes_r in viewitems(dict_rw): + for n_r in nodes_r: + if n_r in current_nodes: + node_n_r = current_nodes[n_r] + else: + node_n_r = (hex(assignblk.instr.offset), i, n_r) + current_nodes[n_r] = node_n_r + in_nodes[n_r] = node_n_r + + flow_graph.add_node(node_n_r) + + node_n_w = (hex(assignblk.instr.offset), i + 1, node_w) + out_nodes[node_w] = node_n_w + + flow_graph.add_node(node_n_w) + flow_graph.add_uniq_edge(node_n_r, node_n_w) + + + +def node2str(node): + out = "%s,%s\\l\\\n%s" % node + return out + + +def gen_function_data_flow_graph(lifter, ircfg, ad, block_flow_cb) -> DiGraph: + ''' + generate data flow graph for a given function + ''' + irblock_0 = None + for irblock in viewvalues(ircfg.blocks): + loc_key = irblock.loc_key + offset = ircfg.loc_db.get_location_offset(loc_key) + # print('{} -> {}'.format(hex(offset), irblock.loc_key)) + if offset == ad: + irblock_0 = irblock + break + assert irblock_0 is not None + flow_graph = DiGraph() + flow_graph.node2str = node2str + + + irb_in_nodes = {} + irb_out_nodes = {} + for label in ircfg.blocks: + irb_in_nodes[label] = {} + irb_out_nodes[label] = {} + + for label, irblock in viewitems(ircfg.blocks): + block_flow_cb(lifter, ircfg, flow_graph, irblock, irb_in_nodes[label], irb_out_nodes[label]) + + # for label in ircfg.blocks: + # print(label) + # print('IN', [str(x) for x in irb_in_nodes[label]]) + # print('OUT', [str(x) for x in irb_out_nodes[label]]) + + # print('*' * 20, 'interblock', '*' * 20) + inter_block_flow(lifter, ircfg, flow_graph, irblock_0.loc_key, irb_in_nodes, irb_out_nodes) + + return flow_graph + + +def ida_dataflow_analysis(function_addr:int, function_name:str, output_dir:str, defuse_only: bool = False) -> None: + + loc_db = LocationDB() + + ###################### IDA specific ####################### + machine = guess_machine() + bin_stream = bin_stream_ida() + + # Populate symbols with ida names + import idautils + for ad, name in idautils.Names(): + if name is None: + continue + loc_db.add_location(name, ad) + + + ###################### Reverse-tool-independent ###################### + + mdis = machine.dis_engine(bin_stream, loc_db=loc_db, dont_dis_nulstart_bloc=True) + mdis.follow_call = True + lifter = machine.lifter_model_call(loc_db=loc_db) + + print('disassembling function: {}:{}'.format(hex(function_addr), function_name)) + asmcfg = mdis.dis_multiblock(function_addr) + + print('generating IR...') + ircfg = lifter.new_ircfg_from_asmcfg(asmcfg) + deadrm = DeadRemoval(lifter) + # deadrm(ircfg) # TODO: 这里会删掉一部分IR,需要研究一下 + + with open(os.path.join(output_dir, '{}.asm2ir'.format(function_name)),'w') as f: + # print('\tOFFSET\t| ASM\t| SRC -> DST') + f.write('\tOFFSET\t| ASM\t| SRC -> DST\n') + for lbl, irblock in ircfg.blocks.items(): + insr = [] + for assignblk in irblock: + for dst, src in assignblk.iteritems(): + # print('\t{}\t| {}\t| {} -> {}'.format(hex(assignblk.instr.offset), assignblk.instr, src, dst)) + f.write('\t{}\t| {}\t| {} -> {}\n'.format(hex(assignblk.instr.offset), assignblk.instr, src, dst)) + + if not defuse_only: + block_flow_cb = intra_block_flow_raw # if args.symb else intra_block_flow_symb + + dfg = gen_function_data_flow_graph(lifter, ircfg, function_addr, block_flow_cb) + open(os.path.join(output_dir,'{}_dfg.dot'.format(function_name)), 'w').write(dfg.dot()) + + reaching_defs = ReachingDefinitions(ircfg) + defuse = DiGraphDefUse(reaching_defs) + open(os.path.join(output_dir,'{}_defuse.dot'.format(function_name)), 'w').write(defuse.dot()) + + ''' + 根据block_loc_key + assignblk_idx 可以推算出instr offset,所以这个def-use图也是可以对应回指令的 + ''' + LocKeyIdx2InstrOffset = {} + for block in viewvalues(reaching_defs.ircfg.blocks): + for index, assignblk in enumerate(block): + LocKeyIdx2InstrOffset['{}_{}'.format(block.loc_key, index)] = hex(assignblk.instr.offset) + + # print(['{}:{}'.format(key,LocKeyIdx2InstrOffset[key]) for key in LocKeyIdx2InstrOffset]) + open(os.path.join(output_dir,'{}_LocKeyIdx2InstrOffset.map'.format(function_name)), 'w').write( + '\n'.join(['{}:{}'.format(key,LocKeyIdx2InstrOffset[key]) for key in LocKeyIdx2InstrOffset])) + + +def miasm_dataflow_analysis(function_addr:int, function_name:str, output_dir:str, filepath:str, arch:str = "X86_64", defuse_only: bool = False) -> None: + + bin_stream = Container.from_stream(open(filepath, 'rb'), loc_db).bin_stream + machine = Machine(arch) + + loc_db = LocationDB() + mdis = machine.dis_engine(bin_stream, loc_db=loc_db, dont_dis_nulstart_bloc=True) + mdis.follow_call = True + lifter = machine.lifter_model_call(loc_db=loc_db) + + print('disassembling function: {}:{}'.format(hex(function_addr), function_name)) + asmcfg = mdis.dis_multiblock(function_addr) + + print('generating IR...') + ircfg = lifter.new_ircfg_from_asmcfg(asmcfg) + deadrm = DeadRemoval(lifter) + # deadrm(ircfg) # TODO: 这里会删掉一部分IR,需要研究一下 + + with open(os.path.join(output_dir, '{}.asm2ir'.format(function_name)),'w') as f: + # print('\tOFFSET\t| ASM\t| SRC -> DST') + f.write('\tOFFSET\t| ASM\t| SRC -> DST\n') + for lbl, irblock in ircfg.blocks.items(): + insr = [] + for assignblk in irblock: + for dst, src in assignblk.iteritems(): + # print('\t{}\t| {}\t| {} -> {}'.format(hex(assignblk.instr.offset), assignblk.instr, src, dst)) + f.write('\t{}\t| {}\t| {} -> {}\n'.format(hex(assignblk.instr.offset), assignblk.instr, src, dst)) + + if not defuse_only: + block_flow_cb = intra_block_flow_raw # if args.symb else intra_block_flow_symb + + dfg = gen_function_data_flow_graph(lifter, ircfg, function_addr, block_flow_cb) + open(os.path.join(output_dir,'{}_dfg.dot'.format(function_name)), 'w').write(dfg.dot()) + + reaching_defs = ReachingDefinitions(ircfg) + defuse = DiGraphDefUse(reaching_defs) + open(os.path.join(output_dir,'{}_defuse.dot'.format(function_name)), 'w').write(defuse.dot()) + + ''' + 根据block_loc_key + assignblk_idx 可以推算出instr offset,所以这个def-use图也是可以对应回指令的 + ''' + LocKeyIdx2InstrOffset = {} + for block in viewvalues(reaching_defs.ircfg.blocks): + for index, assignblk in enumerate(block): + LocKeyIdx2InstrOffset['{}_{}'.format(block.loc_key, index)] = hex(assignblk.instr.offset) + + # print(['{}:{}'.format(key,LocKeyIdx2InstrOffset[key]) for key in LocKeyIdx2InstrOffset]) + open(os.path.join(output_dir,'{}_LocKeyIdx2InstrOffset.map'.format(function_name)), 'w').write( + '\n'.join(['{}:{}'.format(key,LocKeyIdx2InstrOffset[key]) for key in LocKeyIdx2InstrOffset])) + + diff --git a/testcase/test b/testcase/test new file mode 100755 index 0000000..bf45d6a Binary files /dev/null and b/testcase/test differ diff --git a/testcase/test-idapython.py b/testcase/test-idapython.py new file mode 100644 index 0000000..923e23f --- /dev/null +++ b/testcase/test-idapython.py @@ -0,0 +1,17 @@ +import idc +import idautils +import idaapi +import ida_pro +import ida_auto +ida_auto.auto_wait() + + +for func in idautils.Functions(): + + func_name = idc.get_func_name(func) + print(hex(func),':',func_name) + + + + +ida_pro.qexit(0) diff --git a/testcase/test-miasm.py b/testcase/test-miasm.py new file mode 100644 index 0000000..8c7f874 --- /dev/null +++ b/testcase/test-miasm.py @@ -0,0 +1,26 @@ +import idc +import idautils +import idaapi +import ida_pro +import ida_auto +ida_auto.auto_wait() + + +from miasm.analysis.binary import Container +from miasm.core.asmblock import log_asmblock, AsmCFG +from miasm.core.interval import interval +from miasm.analysis.machine import Machine +from miasm.analysis.data_flow import \ + DiGraphDefUse, ReachingDefinitions, load_from_int +from miasm.expression.simplifications import expr_simp +from miasm.analysis.ssa import SSADiGraph +from miasm.ir.ir import AssignBlock, IRBlock +from miasm.analysis.simplifier import IRCFGSimplifierCommon, IRCFGSimplifierSSA +from miasm.core.locationdb import LocationDB + +print("[+] miasm loading success.") + +ida_pro.qexit(0) + + + diff --git a/testcase/test.cpp b/testcase/test.cpp new file mode 100644 index 0000000..f5933c3 --- /dev/null +++ b/testcase/test.cpp @@ -0,0 +1,33 @@ +// C++ program to demonstrate +// accessing of data members + +#include +using namespace std; +class Geeks +{ + // Access specifier + public: + + // Data Members + string geekname; + + // Member Functions() + void printname() + { + cout << "Geekname is: " << geekname; + } +}; + +int main() { + + // Declare an object of class geeks + Geeks obj1; + + // accessing data member + obj1.geekname = "Abhi"; + + // accessing member function + obj1.printname(); + return 0; +} + diff --git a/testcase/test.dump b/testcase/test.dump new file mode 100644 index 0000000..e86ef00 --- /dev/null +++ b/testcase/test.dump @@ -0,0 +1,289 @@ + +test: file format elf64-x86-64 + + +Disassembly of section .init: + +0000000000001000 <_init>: + 1000: 48 83 ec 08 sub $0x8,%rsp + 1004: 48 8b 05 dd 2f 00 00 mov 0x2fdd(%rip),%rax # 3fe8 <__gmon_start__> + 100b: 48 85 c0 test %rax,%rax + 100e: 74 02 je 1012 <_init+0x12> + 1010: ff d0 callq *%rax + 1012: 48 83 c4 08 add $0x8,%rsp + 1016: c3 retq + +Disassembly of section .plt: + +0000000000001020 <.plt>: + 1020: ff 35 e2 2f 00 00 pushq 0x2fe2(%rip) # 4008 <_GLOBAL_OFFSET_TABLE_+0x8> + 1026: ff 25 e4 2f 00 00 jmpq *0x2fe4(%rip) # 4010 <_GLOBAL_OFFSET_TABLE_+0x10> + 102c: 0f 1f 40 00 nopl 0x0(%rax) + +0000000000001030 <_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED1Ev@plt>: + 1030: ff 25 e2 2f 00 00 jmpq *0x2fe2(%rip) # 4018 <_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED1Ev@GLIBCXX_3.4.21> + 1036: 68 00 00 00 00 pushq $0x0 + 103b: e9 e0 ff ff ff jmpq 1020 <.plt> + +0000000000001040 <__cxa_atexit@plt>: + 1040: ff 25 da 2f 00 00 jmpq *0x2fda(%rip) # 4020 <__cxa_atexit@GLIBC_2.2.5> + 1046: 68 01 00 00 00 pushq $0x1 + 104b: e9 d0 ff ff ff jmpq 1020 <.plt> + +0000000000001050 <_ZStlsIcSt11char_traitsIcESaIcEERSt13basic_ostreamIT_T0_ES7_RKNSt7__cxx1112basic_stringIS4_S5_T1_EE@plt>: + 1050: ff 25 d2 2f 00 00 jmpq *0x2fd2(%rip) # 4028 <_ZStlsIcSt11char_traitsIcESaIcEERSt13basic_ostreamIT_T0_ES7_RKNSt7__cxx1112basic_stringIS4_S5_T1_EE@GLIBCXX_3.4.21> + 1056: 68 02 00 00 00 pushq $0x2 + 105b: e9 c0 ff ff ff jmpq 1020 <.plt> + +0000000000001060 <_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@plt>: + 1060: ff 25 ca 2f 00 00 jmpq *0x2fca(%rip) # 4030 <_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@GLIBCXX_3.4> + 1066: 68 03 00 00 00 pushq $0x3 + 106b: e9 b0 ff ff ff jmpq 1020 <.plt> + +0000000000001070 <_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEaSEPKc@plt>: + 1070: ff 25 c2 2f 00 00 jmpq *0x2fc2(%rip) # 4038 <_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEaSEPKc@GLIBCXX_3.4.21> + 1076: 68 04 00 00 00 pushq $0x4 + 107b: e9 a0 ff ff ff jmpq 1020 <.plt> + +0000000000001080 <_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC1Ev@plt>: + 1080: ff 25 ba 2f 00 00 jmpq *0x2fba(%rip) # 4040 <_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC1Ev@GLIBCXX_3.4.21> + 1086: 68 05 00 00 00 pushq $0x5 + 108b: e9 90 ff ff ff jmpq 1020 <.plt> + +0000000000001090 <_ZNSt8ios_base4InitC1Ev@plt>: + 1090: ff 25 b2 2f 00 00 jmpq *0x2fb2(%rip) # 4048 <_ZNSt8ios_base4InitC1Ev@GLIBCXX_3.4> + 1096: 68 06 00 00 00 pushq $0x6 + 109b: e9 80 ff ff ff jmpq 1020 <.plt> + +00000000000010a0 <_Unwind_Resume@plt>: + 10a0: ff 25 aa 2f 00 00 jmpq *0x2faa(%rip) # 4050 <_Unwind_Resume@GCC_3.0> + 10a6: 68 07 00 00 00 pushq $0x7 + 10ab: e9 70 ff ff ff jmpq 1020 <.plt> + +Disassembly of section .plt.got: + +00000000000010b0 <__cxa_finalize@plt>: + 10b0: ff 25 1a 2f 00 00 jmpq *0x2f1a(%rip) # 3fd0 <__cxa_finalize@GLIBC_2.2.5> + 10b6: 66 90 xchg %ax,%ax + +Disassembly of section .text: + +00000000000010c0 <_start>: + 10c0: 31 ed xor %ebp,%ebp + 10c2: 49 89 d1 mov %rdx,%r9 + 10c5: 5e pop %rsi + 10c6: 48 89 e2 mov %rsp,%rdx + 10c9: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp + 10cd: 50 push %rax + 10ce: 54 push %rsp + 10cf: 4c 8d 05 6a 02 00 00 lea 0x26a(%rip),%r8 # 1340 <__libc_csu_fini> + 10d6: 48 8d 0d 03 02 00 00 lea 0x203(%rip),%rcx # 12e0 <__libc_csu_init> + 10dd: 48 8d 3d c1 00 00 00 lea 0xc1(%rip),%rdi # 11a5
+ 10e4: ff 15 f6 2e 00 00 callq *0x2ef6(%rip) # 3fe0 <__libc_start_main@GLIBC_2.2.5> + 10ea: f4 hlt + 10eb: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) + +00000000000010f0 : + 10f0: 48 8d 3d 79 2f 00 00 lea 0x2f79(%rip),%rdi # 4070 <__TMC_END__> + 10f7: 48 8d 05 72 2f 00 00 lea 0x2f72(%rip),%rax # 4070 <__TMC_END__> + 10fe: 48 39 f8 cmp %rdi,%rax + 1101: 74 15 je 1118 + 1103: 48 8b 05 ce 2e 00 00 mov 0x2ece(%rip),%rax # 3fd8 <_ITM_deregisterTMCloneTable> + 110a: 48 85 c0 test %rax,%rax + 110d: 74 09 je 1118 + 110f: ff e0 jmpq *%rax + 1111: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) + 1118: c3 retq + 1119: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) + +0000000000001120 : + 1120: 48 8d 3d 49 2f 00 00 lea 0x2f49(%rip),%rdi # 4070 <__TMC_END__> + 1127: 48 8d 35 42 2f 00 00 lea 0x2f42(%rip),%rsi # 4070 <__TMC_END__> + 112e: 48 29 fe sub %rdi,%rsi + 1131: 48 c1 fe 03 sar $0x3,%rsi + 1135: 48 89 f0 mov %rsi,%rax + 1138: 48 c1 e8 3f shr $0x3f,%rax + 113c: 48 01 c6 add %rax,%rsi + 113f: 48 d1 fe sar %rsi + 1142: 74 14 je 1158 + 1144: 48 8b 05 a5 2e 00 00 mov 0x2ea5(%rip),%rax # 3ff0 <_ITM_registerTMCloneTable> + 114b: 48 85 c0 test %rax,%rax + 114e: 74 08 je 1158 + 1150: ff e0 jmpq *%rax + 1152: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) + 1158: c3 retq + 1159: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) + +0000000000001160 <__do_global_dtors_aux>: + 1160: 80 3d 29 30 00 00 00 cmpb $0x0,0x3029(%rip) # 4190 + 1167: 75 2f jne 1198 <__do_global_dtors_aux+0x38> + 1169: 55 push %rbp + 116a: 48 83 3d 5e 2e 00 00 cmpq $0x0,0x2e5e(%rip) # 3fd0 <__cxa_finalize@GLIBC_2.2.5> + 1171: 00 + 1172: 48 89 e5 mov %rsp,%rbp + 1175: 74 0c je 1183 <__do_global_dtors_aux+0x23> + 1177: 48 8b 3d e2 2e 00 00 mov 0x2ee2(%rip),%rdi # 4060 <__dso_handle> + 117e: e8 2d ff ff ff callq 10b0 <__cxa_finalize@plt> + 1183: e8 68 ff ff ff callq 10f0 + 1188: c6 05 01 30 00 00 01 movb $0x1,0x3001(%rip) # 4190 + 118f: 5d pop %rbp + 1190: c3 retq + 1191: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) + 1198: c3 retq + 1199: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) + +00000000000011a0 : + 11a0: e9 7b ff ff ff jmpq 1120 + +00000000000011a5
: + 11a5: 55 push %rbp + 11a6: 48 89 e5 mov %rsp,%rbp + 11a9: 53 push %rbx + 11aa: 48 83 ec 28 sub $0x28,%rsp + 11ae: 48 8d 45 d0 lea -0x30(%rbp),%rax + 11b2: 48 89 c7 mov %rax,%rdi + 11b5: e8 e8 00 00 00 callq 12a2 <_ZN5GeeksC1Ev> + 11ba: 48 8d 45 d0 lea -0x30(%rbp),%rax + 11be: 48 8d 35 f9 0e 00 00 lea 0xef9(%rip),%rsi # 20be <_ZNSt8__detailL19_S_invalid_state_idE+0x16> + 11c5: 48 89 c7 mov %rax,%rdi + 11c8: e8 a3 fe ff ff callq 1070 <_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEaSEPKc@plt> + 11cd: 48 8d 45 d0 lea -0x30(%rbp),%rax + 11d1: 48 89 c7 mov %rax,%rdi + 11d4: e8 95 00 00 00 callq 126e <_ZN5Geeks9printnameEv> + 11d9: bb 00 00 00 00 mov $0x0,%ebx + 11de: 48 8d 45 d0 lea -0x30(%rbp),%rax + 11e2: 48 89 c7 mov %rax,%rdi + 11e5: e8 d4 00 00 00 callq 12be <_ZN5GeeksD1Ev> + 11ea: 89 d8 mov %ebx,%eax + 11ec: eb 1a jmp 1208 + 11ee: 48 89 c3 mov %rax,%rbx + 11f1: 48 8d 45 d0 lea -0x30(%rbp),%rax + 11f5: 48 89 c7 mov %rax,%rdi + 11f8: e8 c1 00 00 00 callq 12be <_ZN5GeeksD1Ev> + 11fd: 48 89 d8 mov %rbx,%rax + 1200: 48 89 c7 mov %rax,%rdi + 1203: e8 98 fe ff ff callq 10a0 <_Unwind_Resume@plt> + 1208: 48 83 c4 28 add $0x28,%rsp + 120c: 5b pop %rbx + 120d: 5d pop %rbp + 120e: c3 retq + +000000000000120f <_Z41__static_initialization_and_destruction_0ii>: + 120f: 55 push %rbp + 1210: 48 89 e5 mov %rsp,%rbp + 1213: 48 83 ec 10 sub $0x10,%rsp + 1217: 89 7d fc mov %edi,-0x4(%rbp) + 121a: 89 75 f8 mov %esi,-0x8(%rbp) + 121d: 83 7d fc 01 cmpl $0x1,-0x4(%rbp) + 1221: 75 32 jne 1255 <_Z41__static_initialization_and_destruction_0ii+0x46> + 1223: 81 7d f8 ff ff 00 00 cmpl $0xffff,-0x8(%rbp) + 122a: 75 29 jne 1255 <_Z41__static_initialization_and_destruction_0ii+0x46> + 122c: 48 8d 3d 5e 2f 00 00 lea 0x2f5e(%rip),%rdi # 4191 <_ZStL8__ioinit> + 1233: e8 58 fe ff ff callq 1090 <_ZNSt8ios_base4InitC1Ev@plt> + 1238: 48 8d 15 21 2e 00 00 lea 0x2e21(%rip),%rdx # 4060 <__dso_handle> + 123f: 48 8d 35 4b 2f 00 00 lea 0x2f4b(%rip),%rsi # 4191 <_ZStL8__ioinit> + 1246: 48 8b 05 ab 2d 00 00 mov 0x2dab(%rip),%rax # 3ff8 <_ZNSt8ios_base4InitD1Ev@GLIBCXX_3.4> + 124d: 48 89 c7 mov %rax,%rdi + 1250: e8 eb fd ff ff callq 1040 <__cxa_atexit@plt> + 1255: 90 nop + 1256: c9 leaveq + 1257: c3 retq + +0000000000001258 <_GLOBAL__sub_I_main>: + 1258: 55 push %rbp + 1259: 48 89 e5 mov %rsp,%rbp + 125c: be ff ff 00 00 mov $0xffff,%esi + 1261: bf 01 00 00 00 mov $0x1,%edi + 1266: e8 a4 ff ff ff callq 120f <_Z41__static_initialization_and_destruction_0ii> + 126b: 5d pop %rbp + 126c: c3 retq + 126d: 90 nop + +000000000000126e <_ZN5Geeks9printnameEv>: + 126e: 55 push %rbp + 126f: 48 89 e5 mov %rsp,%rbp + 1272: 48 83 ec 10 sub $0x10,%rsp + 1276: 48 89 7d f8 mov %rdi,-0x8(%rbp) + 127a: 48 8d 35 2f 0e 00 00 lea 0xe2f(%rip),%rsi # 20b0 <_ZNSt8__detailL19_S_invalid_state_idE+0x8> + 1281: 48 8d 3d f8 2d 00 00 lea 0x2df8(%rip),%rdi # 4080 <_ZSt4cout@@GLIBCXX_3.4> + 1288: e8 d3 fd ff ff callq 1060 <_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@plt> + 128d: 48 89 c2 mov %rax,%rdx + 1290: 48 8b 45 f8 mov -0x8(%rbp),%rax + 1294: 48 89 c6 mov %rax,%rsi + 1297: 48 89 d7 mov %rdx,%rdi + 129a: e8 b1 fd ff ff callq 1050 <_ZStlsIcSt11char_traitsIcESaIcEERSt13basic_ostreamIT_T0_ES7_RKNSt7__cxx1112basic_stringIS4_S5_T1_EE@plt> + 129f: 90 nop + 12a0: c9 leaveq + 12a1: c3 retq + +00000000000012a2 <_ZN5GeeksC1Ev>: + 12a2: 55 push %rbp + 12a3: 48 89 e5 mov %rsp,%rbp + 12a6: 48 83 ec 10 sub $0x10,%rsp + 12aa: 48 89 7d f8 mov %rdi,-0x8(%rbp) + 12ae: 48 8b 45 f8 mov -0x8(%rbp),%rax + 12b2: 48 89 c7 mov %rax,%rdi + 12b5: e8 c6 fd ff ff callq 1080 <_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC1Ev@plt> + 12ba: 90 nop + 12bb: c9 leaveq + 12bc: c3 retq + 12bd: 90 nop + +00000000000012be <_ZN5GeeksD1Ev>: + 12be: 55 push %rbp + 12bf: 48 89 e5 mov %rsp,%rbp + 12c2: 48 83 ec 10 sub $0x10,%rsp + 12c6: 48 89 7d f8 mov %rdi,-0x8(%rbp) + 12ca: 48 8b 45 f8 mov -0x8(%rbp),%rax + 12ce: 48 89 c7 mov %rax,%rdi + 12d1: e8 5a fd ff ff callq 1030 <_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED1Ev@plt> + 12d6: 90 nop + 12d7: c9 leaveq + 12d8: c3 retq + 12d9: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) + +00000000000012e0 <__libc_csu_init>: + 12e0: 41 57 push %r15 + 12e2: 49 89 d7 mov %rdx,%r15 + 12e5: 41 56 push %r14 + 12e7: 49 89 f6 mov %rsi,%r14 + 12ea: 41 55 push %r13 + 12ec: 41 89 fd mov %edi,%r13d + 12ef: 41 54 push %r12 + 12f1: 4c 8d 25 b0 2a 00 00 lea 0x2ab0(%rip),%r12 # 3da8 <__frame_dummy_init_array_entry> + 12f8: 55 push %rbp + 12f9: 48 8d 2d b8 2a 00 00 lea 0x2ab8(%rip),%rbp # 3db8 <__init_array_end> + 1300: 53 push %rbx + 1301: 4c 29 e5 sub %r12,%rbp + 1304: 48 83 ec 08 sub $0x8,%rsp + 1308: e8 f3 fc ff ff callq 1000 <_init> + 130d: 48 c1 fd 03 sar $0x3,%rbp + 1311: 74 1b je 132e <__libc_csu_init+0x4e> + 1313: 31 db xor %ebx,%ebx + 1315: 0f 1f 00 nopl (%rax) + 1318: 4c 89 fa mov %r15,%rdx + 131b: 4c 89 f6 mov %r14,%rsi + 131e: 44 89 ef mov %r13d,%edi + 1321: 41 ff 14 dc callq *(%r12,%rbx,8) + 1325: 48 83 c3 01 add $0x1,%rbx + 1329: 48 39 dd cmp %rbx,%rbp + 132c: 75 ea jne 1318 <__libc_csu_init+0x38> + 132e: 48 83 c4 08 add $0x8,%rsp + 1332: 5b pop %rbx + 1333: 5d pop %rbp + 1334: 41 5c pop %r12 + 1336: 41 5d pop %r13 + 1338: 41 5e pop %r14 + 133a: 41 5f pop %r15 + 133c: c3 retq + 133d: 0f 1f 00 nopl (%rax) + +0000000000001340 <__libc_csu_fini>: + 1340: c3 retq + +Disassembly of section .fini: + +0000000000001344 <_fini>: + 1344: 48 83 ec 08 sub $0x8,%rsp + 1348: 48 83 c4 08 add $0x8,%rsp + 134c: c3 retq diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..f55fe9a --- /dev/null +++ b/utils.py @@ -0,0 +1,220 @@ +''' +This file comes from the official miasm repo + miasm/example/ida/utils.py +''' +from __future__ import print_function +from builtins import map +import idaapi +from idc import * + +from miasm.analysis.machine import Machine +from miasm.ir.translators import Translator +import miasm.expression.expression as m2_expr + +def guess_machine(addr=None): + "Return an instance of Machine corresponding to the IDA guessed processor" + + processor_name = get_inf_attr(INF_PROCNAME) + info = idaapi.get_inf_structure() + + if info.is_64bit(): + size = 64 + elif info.is_32bit(): + size = 32 + else: + size = None + + if processor_name == "metapc": + size2machine = { + 64: "x86_64", + 32: "x86_32", + None: "x86_16", + } + + machine = Machine(size2machine[size]) + + elif processor_name == "ARM": + # TODO ARM/thumb + # hack for thumb: set armt = True in globals :/ + # set bigendiant = True is bigendian + # Thumb, size, endian + info2machine = {(True, 32, True): "armtb", + (True, 32, False): "armtl", + (False, 32, True): "armb", + (False, 32, False): "arml", + (False, 64, True): "aarch64b", + (False, 64, False): "aarch64l", + } + + # Get T reg to detect arm/thumb function + # Default is arm + is_armt = False + if addr is not None: + t_reg = get_sreg(addr, "T") + is_armt = t_reg == 1 + + is_bigendian = info.is_be() + infos = (is_armt, size, is_bigendian) + if not infos in info2machine: + raise NotImplementedError('not fully functional') + machine = Machine(info2machine[infos]) + + from miasm.analysis.disasm_cb import guess_funcs, guess_multi_cb + from miasm.analysis.disasm_cb import arm_guess_subcall, arm_guess_jump_table + guess_funcs.append(arm_guess_subcall) + guess_funcs.append(arm_guess_jump_table) + + elif processor_name == "msp430": + machine = Machine("msp430") + elif processor_name == "mipsl": + machine = Machine("mips32l") + elif processor_name == "mipsb": + machine = Machine("mips32b") + elif processor_name == "PPC": + machine = Machine("ppc32b") + else: + print(repr(processor_name)) + raise NotImplementedError('not fully functional') + + return machine + + +class TranslatorIDA(Translator): + """Translate a Miasm expression to a IDA colored string""" + + # Implemented language + __LANG__ = "ida_w_color" + + def __init__(self, loc_db=None, **kwargs): + super(TranslatorIDA, self).__init__(**kwargs) + self.loc_db = loc_db + + def str_protected_child(self, child, parent): + return ("(%s)" % ( + self.from_expr(child)) if m2_expr.should_parenthesize_child(child, parent) + else self.from_expr(child) + ) + + def from_ExprInt(self, expr): + return idaapi.COLSTR(str(expr), idaapi.SCOLOR_NUMBER) + + def from_ExprId(self, expr): + out = idaapi.COLSTR(str(expr), idaapi.SCOLOR_REG) + return out + + def from_ExprLoc(self, expr): + if self.loc_db is not None: + out = self.loc_db.pretty_str(expr.loc_key) + else: + out = str(expr) + out = idaapi.COLSTR(out, idaapi.SCOLOR_REG) + return out + + def from_ExprMem(self, expr): + ptr = self.from_expr(expr.ptr) + size = idaapi.COLSTR('@' + str(expr.size), idaapi.SCOLOR_RPTCMT) + out = '%s[%s]' % (size, ptr) + return out + + def from_ExprSlice(self, expr): + base = self.from_expr(expr.arg) + start = idaapi.COLSTR(str(expr.start), idaapi.SCOLOR_RPTCMT) + stop = idaapi.COLSTR(str(expr.stop), idaapi.SCOLOR_RPTCMT) + out = "(%s)[%s:%s]" % (base, start, stop) + return out + + def from_ExprCompose(self, expr): + out = "{" + out += ", ".join(["%s, %s, %s" % (self.from_expr(subexpr), + idaapi.COLSTR(str(idx), idaapi.SCOLOR_RPTCMT), + idaapi.COLSTR(str(idx + subexpr.size), idaapi.SCOLOR_RPTCMT)) + for idx, subexpr in expr.iter_args()]) + out += "}" + return out + + def from_ExprCond(self, expr): + cond = self.str_protected_child(expr.cond, expr) + src1 = self.from_expr(expr.src1) + src2 = self.from_expr(expr.src2) + out = "%s?(%s,%s)" % (cond, src1, src2) + return out + + def from_ExprOp(self, expr): + if expr._op == '-': # Unary minus + return '-' + self.str_protected_child(expr._args[0], expr) + if expr.is_associative() or expr.is_infix(): + return (' ' + expr._op + ' ').join([self.str_protected_child(arg, expr) + for arg in expr._args]) + return (expr._op + '(' + + ', '.join( + self.from_expr(arg) + for arg in expr._args + ) + ')') + + def from_ExprAssign(self, expr): + return "%s = %s" % tuple(map(expr.from_expr, (expr.dst, expr.src))) + + + +def expr2colorstr(expr, loc_db): + """Colorize an Expr instance for IDA + @expr: Expr instance to colorize + @loc_db: LocationDB instance + """ + + translator = TranslatorIDA(loc_db=loc_db) + return translator.from_expr(expr) + + +class translatorForm(idaapi.Form): + """Translator Form. + Offer a ComboBox with available languages (ie. IR translators) and the + corresponding translation.""" + + flags = (idaapi.Form.MultiLineTextControl.TXTF_FIXEDFONT | \ + idaapi.Form.MultiLineTextControl.TXTF_READONLY) + + def __init__(self, expr): + "@expr: Expr instance" + + # Init + self.languages = list(Translator.available_languages()) + self.expr = expr + + # Initial translation + text = Translator.to_language(self.languages[0]).from_expr(self.expr) + + # Create the Form + idaapi.Form.__init__(self, r"""STARTITEM 0 +Python Expression +{FormChangeCb} + + +""", { + 'result': idaapi.Form.MultiLineTextControl(text=text, + flags=translatorForm.flags), + 'cbLanguage': idaapi.Form.DropdownListControl( + items=self.languages, + readonly=True, + selval=0), + 'FormChangeCb': idaapi.Form.FormChangeCb(self.OnFormChange), + }) + + def OnFormChange(self, fid): + if fid == self.cbLanguage.id: + # Display the Field (may be hide) + self.ShowField(self.result, True) + + # Translate the expression + dest_lang = self.languages[self.GetControlValue(self.cbLanguage)] + try: + text = Translator.to_language(dest_lang).from_expr(self.expr) + except Exception as error: + self.ShowField(self.result, False) + return -1 + + # Update the form + self.SetControlValue(self.result, + idaapi.textctrl_info_t(text=str(text), + flags=translatorForm.flags)) + return 1 \ No newline at end of file