# 11.fishhook原理

## 参考：

[GitHub-Fishhook](https://github.com/facebook/fishhook)

## 为源码添加注释

源码代码量不大，建议直接对照源码查看。

```cpp
// Copyright (c) 2013, Facebook, Inc.
// All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//   * Redistributions of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//   * Redistributions in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//   * Neither the name Facebook nor the names of its contributors may be used to
//     endorse or promote products derived from this software without specific
//     prior written permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "fishhook.h"

#include <dlfcn.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <mach/mach.h>
#include <mach/vm_map.h>
#include <mach/vm_region.h>
#include <mach-o/dyld.h>
#include <mach-o/loader.h>
#include <mach-o/nlist.h>

#ifdef __LP64__
typedef struct mach_header_64 mach_header_t;
typedef struct segment_command_64 segment_command_t;
typedef struct section_64 section_t;
typedef struct nlist_64 nlist_t;
#define LC_SEGMENT_ARCH_DEPENDENT LC_SEGMENT_64
#else
typedef struct mach_header mach_header_t;
typedef struct segment_command segment_command_t;
typedef struct section section_t;
typedef struct nlist nlist_t;
#define LC_SEGMENT_ARCH_DEPENDENT LC_SEGMENT
#endif

#ifndef SEG_DATA_CONST
#define SEG_DATA_CONST  "__DATA_CONST"
#endif

struct rebindings_entry {
  struct rebinding *rebindings;
  size_t rebindings_nel;
  struct rebindings_entry *next;
};

static struct rebindings_entry *_rebindings_head;

static int prepend_rebindings(struct rebindings_entry **rebindings_head,
                              struct rebinding rebindings[],
                              size_t nel) {
  struct rebindings_entry *new_entry = (struct rebindings_entry *) malloc(sizeof(struct rebindings_entry));
  if (!new_entry) {
    return -1;
  }
  new_entry->rebindings = (struct rebinding *) malloc(sizeof(struct rebinding) * nel);
  if (!new_entry->rebindings) {
    free(new_entry);
    return -1;
  }
  memcpy(new_entry->rebindings, rebindings, sizeof(struct rebinding) * nel);
  new_entry->rebindings_nel = nel;
  new_entry->next = *rebindings_head;
  *rebindings_head = new_entry;
  return 0;
}

static vm_prot_t get_protection(void *sectionStart) {
  mach_port_t task = mach_task_self();
  vm_size_t size = 0;
  vm_address_t address = (vm_address_t)sectionStart;
  memory_object_name_t object;
#if __LP64__
  mach_msg_type_number_t count = VM_REGION_BASIC_INFO_COUNT_64;
  vm_region_basic_info_data_64_t info;
  kern_return_t info_ret = vm_region_64(
      task, &address, &size, VM_REGION_BASIC_INFO_64, (vm_region_info_64_t)&info, &count, &object);
#else
  mach_msg_type_number_t count = VM_REGION_BASIC_INFO_COUNT;
  vm_region_basic_info_data_t info;
  kern_return_t info_ret = vm_region(task, &address, &size, VM_REGION_BASIC_INFO, (vm_region_info_t)&info, &count, &object);
#endif
  if (info_ret == KERN_SUCCESS) {
    return info.protection;
  } else {
    return VM_PROT_READ;
  }
}


/// 终点：执行重绑定
/// @param rebindings 链表头
/// @param section -
/// @param slide aslr
/// @param symtab 符号表
/// @param strtab String表
/// @param indirect_symtab 间接符号表
static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
                                           section_t *section,
                                           intptr_t slide,
                                           nlist_t *symtab,
                                           char *strtab,
                                           uint32_t *indirect_symtab) {
  const bool isDataConst = strcmp(section->segname, SEG_DATA_CONST) == 0;

    /**
     nl_symbol_ptr 和 la_symbol_ptresction 中的 reserved1 字段指明对应的 indirect symbol table 起始的 index
     */
  uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;

    /**
     slide + section->addr 就是符号对应的存放函数的数组，相应的 __nl_symbol_ptr 和 __la_symbol_ptr 相应的函数指针都在这里面了，所以可以去寻找到函数的地址
     */
  void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
  vm_prot_t oldProtection = VM_PROT_READ;
  if (isDataConst) {
    oldProtection = get_protection(rebindings);
    mprotect(indirect_symbol_bindings, section->size, PROT_READ | PROT_WRITE);
  }
    // 遍历section里面的每一个符号
  for (uint i = 0; i < section->size / sizeof(void *); i++) {
      // 找到符号在Indirect symbol table 表中的值
      // 读取 indirect table 中的数据
    uint32_t symtab_index = indirect_symbol_indices[i];
    if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
        symtab_index == (INDIRECT_SYMBOL_LOCAL   | INDIRECT_SYMBOL_ABS)) {
      continue;
    }
      // 以 symtab_index 作为下标，访问 symbol table
    uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
      // 获取到 symbol_name
    char *symbol_name = strtab + strtab_offset;
      // 判断是否函数的名称是否有两个字符，因为函数前面有个"_"，所以方法名至少两个
    bool symbol_name_longer_than_1 = symbol_name[0] && symbol_name[1];
      // 遍历最初的链表，来进行hook
    struct rebindings_entry *cur = rebindings;
    while (cur) {
      for (uint j = 0; j < cur->rebindings_nel; j++) {

          // 判断 symbol_name[1]开始，两个函数的名字是否一致
        if (symbol_name_longer_than_1 &&
            strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {

            // 判断replaced的地址不为null以及我方法的实现和rebindings[j].replacement 的方法不一致
          if (cur->rebindings[j].replaced != NULL &&
              indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
              // 让 rebindings[j].replaced 保存 indirect_symbol_bindings[i] 的函数地址
            *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
          }
            // 将替换后的方法给原先的方法，也就是替换内容为自定义函数地址
          indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
          goto symbol_loop;
        }
      }
      cur = cur->next;
    }
  symbol_loop:;
  }
  if (isDataConst) {
    int protection = 0;
    if (oldProtection & VM_PROT_READ) {
      protection |= PROT_READ;
    }
    if (oldProtection & VM_PROT_WRITE) {
      protection |= PROT_WRITE;
    }
    if (oldProtection & VM_PROT_EXECUTE) {
      protection |= PROT_EXEC;
    }
    mprotect(indirect_symbol_bindings, section->size, protection);
  }
}


/// 绑定的核心函数
/// @param rebindings 链表头
/// @param header image的头
/// @param slide ASLR
static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
                                     const struct mach_header *header,
                                     intptr_t slide) {
    /**
     dladdr() 这个函数就是在程序里面找header

     可确定指定的address，是否位于构成进程的地址空间的其中一个加载模块（可执行文件或共享库）内。

     如果某个地址，位于在其上面映射加载模块的基址，和为该加载模块映射的最高虚拟地址之间（包括两端），则认为改地址在加载模块的范围内。

     如果某个加载模块复合这个条件，则会搜索其动态符号表，以查找指定的address最接近的符号。
     最接近符号是指其值等于，或最为接近但小于指定address的符号

     如果指定的address不再其中一个模块的加载范围内的话，返回0，且不修改Dl_info结构的内容。否则将返回一个非零值，同时设置Dl_info结构的字段。

     如果在包含address的加载模块内，找不到其值小于或等于address的符号，则dlisname、dli_saddr和dli_size字段将设置为0，dli_bind字段设置为STB_LOCAL，dli_typs字段设置为STT_NOTYPE。

     */
  Dl_info info;
  if (dladdr(header, &info) == 0) {
    return;
  }
// 准备从macho里面去找
  segment_command_t *cur_seg_cmd;
  segment_command_t *linkedit_segment = NULL;
  struct symtab_command* symtab_cmd = NULL;
  struct dysymtab_command* dysymtab_cmd = NULL;
// 跳过header的大小，找到loadCommand
  uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
    cur_seg_cmd = (segment_command_t *)cur;
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
      if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
        linkedit_segment = cur_seg_cmd;
      }
    } else if (cur_seg_cmd->cmd == LC_SYMTAB) {
      symtab_cmd = (struct symtab_command*)cur_seg_cmd;
    } else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
      dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
    }
  }
// 如果刚才获取的，有一项为空就直接返回
  if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
      !dysymtab_cmd->nindirectsyms) {
    return;
  }

    // 链接时程序的基址 = __LINKEDEDIT.VM_Address - __LINKEDIT.File_Offset + silde
  // Find base symbol/string table addresses
  uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
  nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
  char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);

    // 间接（动态）符号表的地址 = 基址 + 符号表偏移量
  // Get indirect symbol table (array of uint32_t indices into symbol table)
  uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);

  cur = (uintptr_t)header + sizeof(mach_header_t);
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
    cur_seg_cmd = (segment_command_t *)cur;
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
        // 找到Data段
      if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
          strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
        continue;
      }
      for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
        section_t *sect =
          (section_t *)(cur + sizeof(segment_command_t)) + j;
          // 找懒加载表
        if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
        }
          // 非懒加载表
        if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
        }
      }
    }
  }
}

static void _rebind_symbols_for_image(const struct mach_header *header,
                                      intptr_t slide) {
    rebind_symbols_for_image(_rebindings_head, header, slide);
}

// 指定image进行hook
int rebind_symbols_image(void *header,
                         intptr_t slide,
                         struct rebinding rebindings[],
                         size_t rebindings_nel) {
    struct rebindings_entry *rebindings_head = NULL;
    int retval = prepend_rebindings(&rebindings_head, rebindings, rebindings_nel);
    // 这里就少了遍历所有image的操作了
    rebind_symbols_for_image(rebindings_head, (const struct mach_header *) header, slide);
    if (rebindings_head) {
      free(rebindings_head->rebindings);
    }
    free(rebindings_head);
    return retval;
}

/// 未指定image进行hook
int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
    // prepend_rebindings 的函数会将整个 rebings 数组添加到 _rebindings_head 这个链表的头部
    // fishhook 采用链表的方式来存储每一次调用 rebind_sysbols传入的参数，每次调用，就会在链表的头部插入一个节点，链表的头部是：_rebindings_head
  int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
    // 根据上面的 prepend_rebindings 来做判断，如果小于0的话，直接返回一个错误码回去
  if (retval < 0) {
    return retval;
  }
  // If this was the first call, register callback for image additions (which is also invoked for
  // existing images, otherwise, just run on existing images
    // 判断 _rebindings_head -> next是否为空，来判断是不是第一次调用
  if (!_rebindings_head->next) {
      // 第一次调用的话调用 _dyld_register_func_for_add_image 注册监听方法
      // 已经被 dyld加载的image会立即加入这个回调
      // 之后的image会在dyld装在的时候触发回调
    _dyld_register_func_for_add_image(_rebind_symbols_for_image);
  } else {
      // 遍历已经加载的image，进行hook
    uint32_t c = _dyld_image_count();
    for (uint32_t i = 0; i < c; i++) {
      _rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
    }
  }
  return retval;
}
```

## 总结

* rebind\_symbols
  * rebindings数组添加到链表
  * 根据链表判断是否第一次调用，这么做的目的是保证注册方法只会调用一次。两种情况都是为了回调`_rebind_symbols_for_image`
    * 第一次
      * 利用`_dyld_register_func_for_add_image`注册监听方法：\_rebind\_symbols\_for\_image
    * 不是第一次
      * 循环遍历已经加载的iamge，进行 `_rebind_symbols_for_image` 回调
* \_rebind\_symbols\_for\_image
  * 第一步
    * 拿到三张表在内存中的地址
      * 符号表地址：`symtab`
      * 字符串表地址：`strtab`
      * 动态（间接）符号表地址：`indirect_symtab`
  * 第二步
    * 找懒加载和非懒加载表
  * 第三部
    * 调用`perform_rebinding_with_section`
* `perform_rebinding_with_section`
  * 1.得到`indirect_symn=bol_bindings`
  * 2.遍历间接符号表，找到符号
  * 3.判断是否是需要hook的
  * 4.保存函数指针，然后替换懒加载符号表中的函数地址
* 完成Hook。


---

# Agent Instructions: Querying This Documentation

If you need additional information that is not directly available in this page, you can query the documentation dynamically by asking a question.

Perform an HTTP GET request on the current page URL with the `ask` query parameter:

```
GET https://ryukiedev.gitbook.io/wiki/ni-xiang/11.fishhook-yuan-li.md?ask=<question>
```

The question should be specific, self-contained, and written in natural language.
The response will contain a direct answer to the question and relevant excerpts and sources from the documentation.

Use this mechanism when the answer is not explicitly present in the current page, you need clarification or additional context, or you want to retrieve related documentation sections.
