No substantial logic changes. Mostly renaming stuff and moving blocks of code around.
v2: Python style fixes
Signed-off-by: Denys Vlasenko dvlasenk@redhat.com --- src/plugins/abrt-gdb-exploitable | 228 ++++++++++++++++++++------------------- 1 file changed, 117 insertions(+), 111 deletions(-)
diff --git a/src/plugins/abrt-gdb-exploitable b/src/plugins/abrt-gdb-exploitable index e3d637b..f33ec45 100755 --- a/src/plugins/abrt-gdb-exploitable +++ b/src/plugins/abrt-gdb-exploitable @@ -30,11 +30,10 @@ def init_gettext(): gettext.bindtextdomain(GETTEXT_PROGNAME, '/usr/share/locale') gettext.textdomain(GETTEXT_PROGNAME)
- _WRITES_ALWAYS = -1 _WRITES_IF_MEMREF = -2
-_writing_instr = { +_x86_writing_instr = { # insn:N, where N: # -1: this insn always writes to memory # -2: writes to memory if any operand is a memory operand @@ -198,7 +197,7 @@ _writing_instr = { # Note: stack-writing instructions are omitted }
-_pushing_instr = ( +_x86_pushing_instr = ( "push", "pusha", "pushf", @@ -207,9 +206,9 @@ _pushing_instr = ( "lcall" )
-_intdiv_instr = ("div", "idiv") +_x86_intdiv_instr = ("div", "idiv")
-_jumping_instr = ( +_x86_jumping_instr = ( "jmp", # indirect jumps/calls with garbage data "call", # call: also possible that stack is exhausted (infinite recursion) "ljmp", @@ -233,7 +232,10 @@ _jumping_instr = ( )
# stack was smashed if we crash on one of these -_return_instr = ("iret", "lret", "ret") +_x86_return_instr = ("iret", "lret", "ret") + +_x86_mem_op1_regex = re.compile("^((-?0x)|[(])") +_x86_mem_op2_regex = re.compile("[,:]((-?0x)|[(])")
def _fetch_insn_from_table(ins, table): if not ins: @@ -251,50 +253,68 @@ def _fetch_insn_from_table(ins, table): return ins return None
+ class SignalAndInsn: - def get_signal(self): - self.signo = None - try: - # Requires new kernels which record complete siginfo - # in coredumps (Linux 3.9 still don't have it), - # and new gdb: - sig = gdb.parse_and_eval("$_siginfo.si_signo") - # Requires patched gdb: - #sig = gdb.parse_and_eval("$_signo") - # - # type(sig) = <type 'gdb.Value'>, convert to plain int: - self.signo = int(sig) - except gdb.error: - # "Python Exception <class 'gdb.error'> - # Attempt to extract a component of a value that is not a structure" - # Possible reasons why $_siginfo doesn't exist: - # program is still running, program exited normally, - # we work with a coredump from an old kernel. - # - # Lets see whether we are running from the abrt and it - # provided us with signal number. Horrible hack :( - # - try: - self.signo = int(os.environ["SIGNO_OF_THE_COREDUMP"]) - except KeyError: + + def x86_instruction_is_store(self): + operand = _fetch_insn_from_table(self.mnemonic, _x86_writing_instr) + if not operand: + if not self.mnemonic: return False - return True + # There are far too many SSE store instructions, + # don't want to pollute the table with them. + # Special-case the check for MOVxxx + # and its SIMD cousins VMOVxxx: + if self.mnemonic[:3] != "mov" and self.mnemonic[:4] != "vmov": + return False + operand = 2
- def get_instruction(self): - self.current_instruction = None - self.mnemonic = None - self.operands = "" + if operand == _WRITES_ALWAYS: # no need to check operands, it's a write + return True + + # Memory operands look like this: [%seg:][[-]0xHEXNUM][(%reg[,...])] + # Careful with immediate operands which are $0xHEXNUM + # and FPU register references which are st(N). + if _x86_mem_op1_regex.search(self.operands): + mem_op_pos = 0 + else: + match = _x86_mem_op2_regex.search(self.operands) + if not match: + return False # no memory operands + mem_op_pos = match.start() + 1 + + if operand == _WRITES_IF_MEMREF: # any mem operand indicates write + return True + + comma = self.operands.find(",") + if mem_op_pos < comma: + # "%cs:0x0(%rax,%rax,1),foo" - 1st operand is memory + # "%cs:0x0(%rax),foo" - 1st operand is memory + memory_operand = 1 + elif comma < 0: + # "%cs:0x0(%rax)" - 1st operand is memory + memory_operand = 1 + else: + # mem_op_pos is after comma + # "foo,%cs:0x0(%rax,%rax,1)" - 2nd operand is memory + # (It also can be a third, fourth etc operand) + memory_operand = 2 + + if operand == memory_operand: + return True + return False + + def x86_get_instruction(self): try: # just "disassemble $pc" won't work if $pc doesn't point # inside a known function - instructions = gdb.execute("disassemble $pc,$pc+32", to_string=True) + raw_instructions = gdb.execute("disassemble $pc,$pc+32", to_string=True) except gdb.error: # For example, if tracee already exited normally. # Another observed case is if $pc points to unmapped area. # We get "Python Exception <class 'gdb.error'> No registers" return
- raw_instructions = instructions instructions = [] current = None for line in raw_instructions.split("\n"): @@ -316,7 +336,7 @@ class SignalAndInsn: if line: instructions.append(line) if current == None: - # not False! we determined that $pc points to a bad address, + # we determined that $pc points to a bad address, # which is an interesting fact. return
@@ -344,76 +364,62 @@ class SignalAndInsn: self.operands = t[1] break
- mem_op1_regex = re.compile("^((-?0x)|[(])") - mem_op2_regex = re.compile("[,:]((-?0x)|[(])") + self.instruction_is_pushing = (_fetch_insn_from_table(self.mnemonic, _x86_pushing_instr) is not None) + self.instruction_is_division = (_fetch_insn_from_table(self.mnemonic, _x86_intdiv_instr) is not None) + self.instruction_is_branch = (_fetch_insn_from_table(self.mnemonic, _x86_jumping_instr) is not None) + self.instruction_is_return = (_fetch_insn_from_table(self.mnemonic, _x86_return_instr) is not None) + self.instruction_is_store = self.x86_instruction_is_store()
- def instruction_is_writing(self): - operand = _fetch_insn_from_table(self.mnemonic, _writing_instr) - if not operand: - if not self.mnemonic: - return False - # There are far too many SSE store instructions, - # don't want to pollute the table with them. - # Special-case the check for MOVxxx - # and its SIMD cousins VMOVxxx: - if self.mnemonic[:3] != "mov" and self.mnemonic[:4] != "vmov": - return False - operand = 2
- if operand == _WRITES_ALWAYS: # no need to check operands, it's a write - return True - - # Memory operands look like this: [%seg:][[-]0xHEXNUM][(%reg[,...])] - # Careful with immediate operands which are $0xHEXNUM - # and FPU register references which are st(N). - if SignalAndInsn.mem_op1_regex.search(self.operands): - mem_op_pos = 0 - else: - match = SignalAndInsn.mem_op2_regex.search(self.operands) - if not match: - return False # no memory operands - mem_op_pos = match.start() + 1 - - if operand == _WRITES_IF_MEMREF: # any mem operand indicates write - return True - - comma = self.operands.find(",") - if mem_op_pos < comma: - # "%cs:0x0(%rax,%rax,1),foo" - 1st operand is memory - # "%cs:0x0(%rax),foo" - 1st operand is memory - memory_operand = 1 - elif comma < 0: - # "%cs:0x0(%rax)" - 1st operand is memory - memory_operand = 1 - else: - # mem_op_pos is after comma - # "foo,%cs:0x0(%rax,%rax,1)" - 2nd operand is memory - # (It also can be a third, fourth etc operand) - memory_operand = 2 - - if operand == memory_operand: - return True - return False - - def instruction_is_pushing(self): - if _fetch_insn_from_table(self.mnemonic, _pushing_instr): - return True - return False - - def instruction_is_division(self): - if _fetch_insn_from_table(self.mnemonic, _intdiv_instr): - return True - return False - - def instruction_is_jumping(self): - if _fetch_insn_from_table(self.mnemonic, _jumping_instr): - return True - return False + def get_instruction(self): + self.current_instruction = None + self.mnemonic = None + self.operands = "" + self.instruction_is_division = None + self.instruction_is_store = None + self.instruction_is_pushing = None + self.instruction_is_return = None + self.instruction_is_branch = None + try: + arch = gdb.execute("show architecture", to_string=True) + # Examples of the string we get: + # The target architecture is set automatically (currently i386) + # The target architecture is set automatically (currently i386:x86-64) + # The target architecture is set automatically (currently powerpc:common64) + if " i386" in arch: + return self.x86_get_instruction() + #if " powerpc" in arch: + # return self.ppc_get_instruction() + except gdb.error: + return
- def instruction_is_return(self): - if _fetch_insn_from_table(self.mnemonic, _return_instr): - return True - return False + def get_signal(self): + self.signo = None + try: + # Requires new kernels which record complete siginfo + # in coredumps (Linux 3.9 still don't have it), + # and new gdb: + sig = gdb.parse_and_eval("$_siginfo.si_signo") + # Requires patched gdb: + #sig = gdb.parse_and_eval("$_signo") + # + # type(sig) = <type 'gdb.Value'>, convert to plain int: + self.signo = int(sig) + except gdb.error: + # "Python Exception <class 'gdb.error'> + # Attempt to extract a component of a value that is not a structure" + # Possible reasons why $_siginfo doesn't exist: + # program is still running, program exited normally, + # we work with a coredump from an old kernel. + # + # Lets see whether we are running from the abrt and it + # provided us with signal number. Horrible hack :( + # + try: + self.signo = int(os.environ["SIGNO_OF_THE_COREDUMP"]) + except KeyError: + return False + return True
#Our initial set of testing will use the list Apple included in their #CrashWrangler announcement: @@ -467,7 +473,7 @@ class SignalAndInsn: elif self.signo == signal.SIGFPE: self.exploitable_rating = 1 self.exploitable_desc = _("Arithmetic exception") - if self.instruction_is_division(): + if self.instruction_is_division: self.exploitable_rating = 0 self.exploitable_desc = _("Division by zero") elif self.signo == signal.SIGILL: @@ -476,18 +482,18 @@ class SignalAndInsn:
# TODO: check that sig is SIGSEGV/SIGBUS?
- elif self.instruction_is_pushing(): + elif self.instruction_is_pushing: self.exploitable_rating = 4 self.exploitable_desc = _("Stack overflow") - elif self.instruction_is_writing(): + elif self.instruction_is_store: self.exploitable_rating = 6 self.exploitable_desc = _("Write to an invalid address") - elif self.instruction_is_return(): + elif self.instruction_is_return: self.exploitable_rating = 7 self.exploitable_desc = _("Subroutine return to an invalid address (corrupted stack?)") # Note: we check "ret" first, _then_ jumps. # Corrupted stack is different from corrupted data. - elif self.instruction_is_jumping(): + elif self.instruction_is_branch: self.exploitable_rating = 6 self.exploitable_desc = _("Jump to an invalid address") elif not self.current_instruction:
v2: Python style fixes
Signed-off-by: Denys Vlasenko dvlasenk@redhat.com --- src/plugins/abrt-gdb-exploitable | 60 ++++++++++++++++++++++++++++++++++++++-- tests/abrt-exploitable/Makefile | 4 +-- 2 files changed, 60 insertions(+), 4 deletions(-)
diff --git a/src/plugins/abrt-gdb-exploitable b/src/plugins/abrt-gdb-exploitable index f33ec45..1f33061 100755 --- a/src/plugins/abrt-gdb-exploitable +++ b/src/plugins/abrt-gdb-exploitable @@ -371,6 +371,62 @@ class SignalAndInsn: self.instruction_is_store = self.x86_instruction_is_store()
+ def ppc_get_instruction(self): + try: + # just "disassemble $pc" won't work if $pc doesn't point + # inside a known function + raw_instructions = gdb.execute("disassemble $pc,$pc+32", to_string=True) + except gdb.error: + # For example, if tracee already exited normally. + # Another observed case is if $pc points to unmapped area. + # We get "Python Exception <class 'gdb.error'> No registers" + return + + instructions = [] + current = None + for line in raw_instructions.split("\n"): + # line can be: + # "Dump of assembler code from 0xAAAA to 0xBBBB:" + # "[=>] 0x00000000004004dc[ <+0>]: push %rbp" + # (" <+0>" part is present when we run on a live process, + # on coredump it is absent) + # "End of assembler dump." + # "" (empty line) + if line.startswith("=>"): + line = line[2:] + current = len(instructions) + line = line.split(":", 1) + if len(line) < 2: # no ":"? + continue + line = line[1] # drop "foo:" + line = line.strip() # drop leading/trailing whitespace + if line: + instructions.append(line) + if current is None: + # we determined that $pc points to a bad address, + # which is an interesting fact. + return + + # There can be a disasm comment: "insn op,op,op # comment"; + # strip it, and whitespace on both ends: + t = instructions[current].split("#", 1)[0].strip() + self.current_instruction = t + # Split it into mnemonic and operands + t = t.split(None, 1) + self.mnemonic = t[0] + if len(t) > 1: + self.operands = t[1] + + self.instruction_is_store = self.mnemonic.startswith("st") + self.instruction_is_branch = self.mnemonic.startswith("b") + self.instruction_is_pushing = (self.instruction_is_store and "(r1)" in self.operands) + # Looks like div[o] insns on ppc don't cause exceptions + # (need to check whether, and how, FPE is generated) + #self.instruction_is_division = + # On ppc, return insn is b[cond]lr. TODO: is cond form ever used by gcc? + self.instruction_is_return = (self.mnemonic == "blr") + + def get_instruction(self): self.current_instruction = None self.mnemonic = None @@ -388,8 +444,8 @@ class SignalAndInsn: # The target architecture is set automatically (currently powerpc:common64) if " i386" in arch: return self.x86_get_instruction() - #if " powerpc" in arch: - # return self.ppc_get_instruction() + if " powerpc" in arch: + return self.ppc_get_instruction() except gdb.error: return
diff --git a/tests/abrt-exploitable/Makefile b/tests/abrt-exploitable/Makefile index 6672fe3..9df463c 100644 --- a/tests/abrt-exploitable/Makefile +++ b/tests/abrt-exploitable/Makefile @@ -34,7 +34,7 @@ all: $(TESTS) clean: rm -f $(TESTS)
-testlive: +testlive: all for t in $(TESTS); do \ echo "====="; \ echo "Test: $$t"; \ @@ -48,7 +48,7 @@ testlive: ./$$t; \ done 2>&1 | tee testlive.log
-testcore: +testcore: all rm ./core* 2>/dev/null; \ ulimit -c unlimited; \ for t in $(TESTS); do \
crash-catcher@lists.fedorahosted.org