11.fishhook原理

参考:

GitHub-Fishhook

为源码添加注释

源码代码量不大,建议直接对照源码查看。

// Copyright (c) 2013, Facebook, Inc.
// All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//   * Redistributions of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//   * Redistributions in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//   * Neither the name Facebook nor the names of its contributors may be used to
//     endorse or promote products derived from this software without specific
//     prior written permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "fishhook.h"

#include <dlfcn.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <mach/mach.h>
#include <mach/vm_map.h>
#include <mach/vm_region.h>
#include <mach-o/dyld.h>
#include <mach-o/loader.h>
#include <mach-o/nlist.h>

#ifdef __LP64__
typedef struct mach_header_64 mach_header_t;
typedef struct segment_command_64 segment_command_t;
typedef struct section_64 section_t;
typedef struct nlist_64 nlist_t;
#define LC_SEGMENT_ARCH_DEPENDENT LC_SEGMENT_64
#else
typedef struct mach_header mach_header_t;
typedef struct segment_command segment_command_t;
typedef struct section section_t;
typedef struct nlist nlist_t;
#define LC_SEGMENT_ARCH_DEPENDENT LC_SEGMENT
#endif

#ifndef SEG_DATA_CONST
#define SEG_DATA_CONST  "__DATA_CONST"
#endif

struct rebindings_entry {
  struct rebinding *rebindings;
  size_t rebindings_nel;
  struct rebindings_entry *next;
};

static struct rebindings_entry *_rebindings_head;

static int prepend_rebindings(struct rebindings_entry **rebindings_head,
                              struct rebinding rebindings[],
                              size_t nel) {
  struct rebindings_entry *new_entry = (struct rebindings_entry *) malloc(sizeof(struct rebindings_entry));
  if (!new_entry) {
    return -1;
  }
  new_entry->rebindings = (struct rebinding *) malloc(sizeof(struct rebinding) * nel);
  if (!new_entry->rebindings) {
    free(new_entry);
    return -1;
  }
  memcpy(new_entry->rebindings, rebindings, sizeof(struct rebinding) * nel);
  new_entry->rebindings_nel = nel;
  new_entry->next = *rebindings_head;
  *rebindings_head = new_entry;
  return 0;
}

static vm_prot_t get_protection(void *sectionStart) {
  mach_port_t task = mach_task_self();
  vm_size_t size = 0;
  vm_address_t address = (vm_address_t)sectionStart;
  memory_object_name_t object;
#if __LP64__
  mach_msg_type_number_t count = VM_REGION_BASIC_INFO_COUNT_64;
  vm_region_basic_info_data_64_t info;
  kern_return_t info_ret = vm_region_64(
      task, &address, &size, VM_REGION_BASIC_INFO_64, (vm_region_info_64_t)&info, &count, &object);
#else
  mach_msg_type_number_t count = VM_REGION_BASIC_INFO_COUNT;
  vm_region_basic_info_data_t info;
  kern_return_t info_ret = vm_region(task, &address, &size, VM_REGION_BASIC_INFO, (vm_region_info_t)&info, &count, &object);
#endif
  if (info_ret == KERN_SUCCESS) {
    return info.protection;
  } else {
    return VM_PROT_READ;
  }
}


/// 终点:执行重绑定
/// @param rebindings 链表头
/// @param section -
/// @param slide aslr
/// @param symtab 符号表
/// @param strtab String表
/// @param indirect_symtab 间接符号表
static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
                                           section_t *section,
                                           intptr_t slide,
                                           nlist_t *symtab,
                                           char *strtab,
                                           uint32_t *indirect_symtab) {
  const bool isDataConst = strcmp(section->segname, SEG_DATA_CONST) == 0;

    /**
     nl_symbol_ptr 和 la_symbol_ptresction 中的 reserved1 字段指明对应的 indirect symbol table 起始的 index
     */
  uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;

    /**
     slide + section->addr 就是符号对应的存放函数的数组,相应的 __nl_symbol_ptr 和 __la_symbol_ptr 相应的函数指针都在这里面了,所以可以去寻找到函数的地址
     */
  void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
  vm_prot_t oldProtection = VM_PROT_READ;
  if (isDataConst) {
    oldProtection = get_protection(rebindings);
    mprotect(indirect_symbol_bindings, section->size, PROT_READ | PROT_WRITE);
  }
    // 遍历section里面的每一个符号
  for (uint i = 0; i < section->size / sizeof(void *); i++) {
      // 找到符号在Indirect symbol table 表中的值
      // 读取 indirect table 中的数据
    uint32_t symtab_index = indirect_symbol_indices[i];
    if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
        symtab_index == (INDIRECT_SYMBOL_LOCAL   | INDIRECT_SYMBOL_ABS)) {
      continue;
    }
      // 以 symtab_index 作为下标,访问 symbol table
    uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
      // 获取到 symbol_name
    char *symbol_name = strtab + strtab_offset;
      // 判断是否函数的名称是否有两个字符,因为函数前面有个"_",所以方法名至少两个
    bool symbol_name_longer_than_1 = symbol_name[0] && symbol_name[1];
      // 遍历最初的链表,来进行hook
    struct rebindings_entry *cur = rebindings;
    while (cur) {
      for (uint j = 0; j < cur->rebindings_nel; j++) {

          // 判断 symbol_name[1]开始,两个函数的名字是否一致
        if (symbol_name_longer_than_1 &&
            strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {

            // 判断replaced的地址不为null以及我方法的实现和rebindings[j].replacement 的方法不一致
          if (cur->rebindings[j].replaced != NULL &&
              indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
              // 让 rebindings[j].replaced 保存 indirect_symbol_bindings[i] 的函数地址
            *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
          }
            // 将替换后的方法给原先的方法,也就是替换内容为自定义函数地址
          indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
          goto symbol_loop;
        }
      }
      cur = cur->next;
    }
  symbol_loop:;
  }
  if (isDataConst) {
    int protection = 0;
    if (oldProtection & VM_PROT_READ) {
      protection |= PROT_READ;
    }
    if (oldProtection & VM_PROT_WRITE) {
      protection |= PROT_WRITE;
    }
    if (oldProtection & VM_PROT_EXECUTE) {
      protection |= PROT_EXEC;
    }
    mprotect(indirect_symbol_bindings, section->size, protection);
  }
}


/// 绑定的核心函数
/// @param rebindings 链表头
/// @param header image的头
/// @param slide ASLR
static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
                                     const struct mach_header *header,
                                     intptr_t slide) {
    /**
     dladdr() 这个函数就是在程序里面找header

     可确定指定的address,是否位于构成进程的地址空间的其中一个加载模块(可执行文件或共享库)内。

     如果某个地址,位于在其上面映射加载模块的基址,和为该加载模块映射的最高虚拟地址之间(包括两端),则认为改地址在加载模块的范围内。

     如果某个加载模块复合这个条件,则会搜索其动态符号表,以查找指定的address最接近的符号。
     最接近符号是指其值等于,或最为接近但小于指定address的符号

     如果指定的address不再其中一个模块的加载范围内的话,返回0,且不修改Dl_info结构的内容。否则将返回一个非零值,同时设置Dl_info结构的字段。

     如果在包含address的加载模块内,找不到其值小于或等于address的符号,则dlisname、dli_saddr和dli_size字段将设置为0,dli_bind字段设置为STB_LOCAL,dli_typs字段设置为STT_NOTYPE。

     */
  Dl_info info;
  if (dladdr(header, &info) == 0) {
    return;
  }
// 准备从macho里面去找
  segment_command_t *cur_seg_cmd;
  segment_command_t *linkedit_segment = NULL;
  struct symtab_command* symtab_cmd = NULL;
  struct dysymtab_command* dysymtab_cmd = NULL;
// 跳过header的大小,找到loadCommand
  uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
    cur_seg_cmd = (segment_command_t *)cur;
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
      if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
        linkedit_segment = cur_seg_cmd;
      }
    } else if (cur_seg_cmd->cmd == LC_SYMTAB) {
      symtab_cmd = (struct symtab_command*)cur_seg_cmd;
    } else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
      dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
    }
  }
// 如果刚才获取的,有一项为空就直接返回
  if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
      !dysymtab_cmd->nindirectsyms) {
    return;
  }

    // 链接时程序的基址 = __LINKEDEDIT.VM_Address - __LINKEDIT.File_Offset + silde
  // Find base symbol/string table addresses
  uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
  nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
  char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);

    // 间接(动态)符号表的地址 = 基址 + 符号表偏移量
  // Get indirect symbol table (array of uint32_t indices into symbol table)
  uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);

  cur = (uintptr_t)header + sizeof(mach_header_t);
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
    cur_seg_cmd = (segment_command_t *)cur;
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
        // 找到Data段
      if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
          strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
        continue;
      }
      for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
        section_t *sect =
          (section_t *)(cur + sizeof(segment_command_t)) + j;
          // 找懒加载表
        if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
        }
          // 非懒加载表
        if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
        }
      }
    }
  }
}

static void _rebind_symbols_for_image(const struct mach_header *header,
                                      intptr_t slide) {
    rebind_symbols_for_image(_rebindings_head, header, slide);
}

// 指定image进行hook
int rebind_symbols_image(void *header,
                         intptr_t slide,
                         struct rebinding rebindings[],
                         size_t rebindings_nel) {
    struct rebindings_entry *rebindings_head = NULL;
    int retval = prepend_rebindings(&rebindings_head, rebindings, rebindings_nel);
    // 这里就少了遍历所有image的操作了
    rebind_symbols_for_image(rebindings_head, (const struct mach_header *) header, slide);
    if (rebindings_head) {
      free(rebindings_head->rebindings);
    }
    free(rebindings_head);
    return retval;
}

/// 未指定image进行hook
int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
    // prepend_rebindings 的函数会将整个 rebings 数组添加到 _rebindings_head 这个链表的头部
    // fishhook 采用链表的方式来存储每一次调用 rebind_sysbols传入的参数,每次调用,就会在链表的头部插入一个节点,链表的头部是:_rebindings_head
  int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
    // 根据上面的 prepend_rebindings 来做判断,如果小于0的话,直接返回一个错误码回去
  if (retval < 0) {
    return retval;
  }
  // If this was the first call, register callback for image additions (which is also invoked for
  // existing images, otherwise, just run on existing images
    // 判断 _rebindings_head -> next是否为空,来判断是不是第一次调用
  if (!_rebindings_head->next) {
      // 第一次调用的话调用 _dyld_register_func_for_add_image 注册监听方法
      // 已经被 dyld加载的image会立即加入这个回调
      // 之后的image会在dyld装在的时候触发回调
    _dyld_register_func_for_add_image(_rebind_symbols_for_image);
  } else {
      // 遍历已经加载的image,进行hook
    uint32_t c = _dyld_image_count();
    for (uint32_t i = 0; i < c; i++) {
      _rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
    }
  }
  return retval;
}

总结

  • rebind_symbols

    • rebindings数组添加到链表

    • 根据链表判断是否第一次调用,这么做的目的是保证注册方法只会调用一次。两种情况都是为了回调_rebind_symbols_for_image

      • 第一次

        • 利用_dyld_register_func_for_add_image注册监听方法:_rebind_symbols_for_image

      • 不是第一次

        • 循环遍历已经加载的iamge,进行 _rebind_symbols_for_image 回调

  • _rebind_symbols_for_image

    • 第一步

      • 拿到三张表在内存中的地址

        • 符号表地址:symtab

        • 字符串表地址:strtab

        • 动态(间接)符号表地址:indirect_symtab

    • 第二步

      • 找懒加载和非懒加载表

    • 第三部

      • 调用perform_rebinding_with_section

  • perform_rebinding_with_section

    • 1.得到indirect_symn=bol_bindings

    • 2.遍历间接符号表,找到符号

    • 3.判断是否是需要hook的

    • 4.保存函数指针,然后替换懒加载符号表中的函数地址

  • 完成Hook。

Last updated