afl-fuzz源码分析中篇

发表于： 2024-09-01 分类于： AEG/Fuzz

字数： 8420 阅读：≈ 17分钟浏览：

[TOC]

前言

该部分内容是fuzz大循环前的一小部分内容，具体内容如下

// dry run
  perform_dry_run(use_argv);
  // 精简队列，这个等到下篇再说
  cull_queue();
  //ui部分
  show_init_stats();

  // 若是恢复之前的 fuzz，则找到该从队列的什么位置继续
  seek_to = find_start_position();
  // 更新 fuzzer_stats 文件
  write_stats_file(0, 0, 0);
  // 保存 auto extras
  save_auto();
  if (stop_soon) goto stop_fuzzing;

  /* Woop woop woop */

  if (!not_on_tty) {
    sleep(4);
    start_time += 4000;
    if (stop_soon) goto stop_fuzzing;
  }

个人认为很重要的部分如下

perform_dry_run
calibrate_case 重要之重要，校准函数，对每个testcase会跑多遍，必看
init_forkserver 初始化fork server函数，很重要
run_target  重要之重要，也和fork server有关，也是经常用到，必看
classify_counts 给trace_bits分桶
update_bitmap_score 和top_rated更新有关

perform_dry_run

对于queue 中的每一个用例，调用 calibrate_case 函数进行校准，根据返回值res进行相应的处理
perform_dry_run只会跑一次，可以把它当作fuzz之前的一个校准所有testcase的步骤
主要流程如下

上述代码对于 queue 中的每一个用例，调用 calibrate_case 函数进行校准。用例会被运行多次（默认是 8 次，这个函数的具体细节我们下文讨论）。对于校准结果：

若 timeout 了，且 -t 参数里面没有容忍超时、也不处于 resume 模式，则直接退出。
若 crash 了，则直接退出（除非有 AFL_SKIP_CRASHES 环境变量）。
若无法执行目标程序，或目标程序没被插桩，则直接退出。

/* Perform dry run of all test cases to confirm that the app is working as
   expected. This is done only for the initial inputs, and only once. */

static void perform_dry_run(char** argv) {

  struct queue_entry* q = queue;
  u32 cal_failures = 0;
  u8* skip_crashes = getenv("AFL_SKIP_CRASHES");
  //遍历每一个testcase
  while (q) {

    u8* use_mem;
    u8  res;
    s32 fd;

    u8* fn = strrchr(q->fname, '/') + 1;

    ACTF("Attempting dry run with '%s'...", fn);

    fd = open(q->fname, O_RDONLY);
    if (fd < 0) PFATAL("Unable to open '%s'", q->fname);

    use_mem = ck_alloc_nozero(q->len);

    //将testcase内容读进use_mem
    if (read(fd, use_mem, q->len) != q->len)
      FATAL("Short read from '%s'", q->fname);

    close(fd);

    //校验这个testcase并获取返回值存到res
    res = calibrate_case(argv, q, use_mem, 0, 1);
    ck_free(use_mem);

    if (stop_soon) return;

    if (res == crash_mode || res == FAULT_NOBITS)
      SAYF(cGRA "    len = %u, map size = %u, exec speed = %llu us\n" cRST, 
           q->len, q->bitmap_size, q->exec_us);

    switch (res) {

      case FAULT_NONE:
        //queue是队列的第一个位置的指针，也就是第一个testcase
        // 若 shm 中有超过 100 个 id 被命中过，这其中却没有 >= 32768 的，说明 id 分布非常不均匀
        // 此时提醒用户用最新的 AFL 重新编译目标程序
        if (q == queue) check_map_coverage();

        // crash exploration 模式，但是却没 crash
        if (crash_mode) FATAL("Test case '%s' does *NOT* crash", fn);

        break;

      case FAULT_TMOUT:

        if (timeout_given) {

          /* The -t nn+ syntax in the command line sets timeout_given to '2' and
             instructs afl-fuzz to tolerate but skip queue entries that time
             out. */
          // 若 argv 中，时间限制参数是 -t nn+，或 resume 模式，则容忍超时
          if (timeout_given > 1) {
            WARNF("Test case results in a timeout (skipping)");
            q->cal_failed = CAL_CHANCES;
            cal_failures++;
            break;
          }

          SAYF("\n" cLRD "[-] " cRST
               "The program took more than %u ms to process one of the initial test cases.\n"
               "    Usually, the right thing to do is to relax the -t option - or to delete it\n"
               "    altogether and allow the fuzzer to auto-calibrate. That said, if you know\n"
               "    what you are doing and want to simply skip the unruly test cases, append\n"
               "    '+' at the end of the value passed to -t ('-t %u+').\n", exec_tmout,
               exec_tmout);

          FATAL("Test case '%s' results in a timeout", fn);

        } else {

          SAYF("\n" cLRD "[-] " cRST
               "The program took more than %u ms to process one of the initial test cases.\n"
               "    This is bad news; raising the limit with the -t option is possible, but\n"
               "    will probably make the fuzzing process extremely slow.\n\n"

               "    If this test case is just a fluke, the other option is to just avoid it\n"
               "    altogether, and find one that is less of a CPU hog.\n", exec_tmout);

          FATAL("Test case '%s' results in a timeout", fn);

        }

      //发生了crash
      case FAULT_CRASH:  

        if (crash_mode) break;

        if (skip_crashes) {
          WARNF("Test case results in a crash (skipping)");
          q->cal_failed = CAL_CHANCES;
          cal_failures++;
          break;
        }

        if (mem_limit) {

          SAYF("\n" cLRD "[-] " cRST
               "Oops, the program crashed with one of the test cases provided. There are\n"
               "    several possible explanations:\n\n"

               "    - The test case causes known crashes under normal working conditions. If\n"
               "      so, please remove it. The fuzzer should be seeded with interesting\n"
               "      inputs - but not ones that cause an outright crash.\n\n"

               "    - The current memory limit (%s) is too low for this program, causing\n"
               "      it to die due to OOM when parsing valid files. To fix this, try\n"
               "      bumping it up with the -m setting in the command line. If in doubt,\n"
               "      try something along the lines of:\n\n"

#ifdef RLIMIT_AS
               "      ( ulimit -Sv $[%llu << 10]; /path/to/binary [...] <testcase )\n\n"
#else
               "      ( ulimit -Sd $[%llu << 10]; /path/to/binary [...] <testcase )\n\n"
#endif /* ^RLIMIT_AS */

               "      Tip: you can use http://jwilk.net/software/recidivm to quickly\n"
               "      estimate the required amount of virtual memory for the binary. Also,\n"
               "      if you are using ASAN, see %s/notes_for_asan.txt.\n\n"

#ifdef __APPLE__
  
               "    - On MacOS X, the semantics of fork() syscalls are non-standard and may\n"
               "      break afl-fuzz performance optimizations when running platform-specific\n"
               "      binaries. To fix this, set AFL_NO_FORKSRV=1 in the environment.\n\n"

#endif /* __APPLE__ */

               "    - Least likely, there is a horrible bug in the fuzzer. If other options\n"
               "      fail, poke <lcamtuf@coredump.cx> for troubleshooting tips.\n",
               DMS(mem_limit << 20), mem_limit - 1, doc_path);

        } else {

          SAYF("\n" cLRD "[-] " cRST
               "Oops, the program crashed with one of the test cases provided. There are\n"
               "    several possible explanations:\n\n"

               "    - The test case causes known crashes under normal working conditions. If\n"
               "      so, please remove it. The fuzzer should be seeded with interesting\n"
               "      inputs - but not ones that cause an outright crash.\n\n"

#ifdef __APPLE__
  
               "    - On MacOS X, the semantics of fork() syscalls are non-standard and may\n"
               "      break afl-fuzz performance optimizations when running platform-specific\n"
               "      binaries. To fix this, set AFL_NO_FORKSRV=1 in the environment.\n\n"

#endif /* __APPLE__ */

               "    - Least likely, there is a horrible bug in the fuzzer. If other options\n"
               "      fail, poke <lcamtuf@coredump.cx> for troubleshooting tips.\n");

        }

        FATAL("Test case '%s' results in a crash", fn);

      //检测到无法执行文件
      case FAULT_ERROR:

        FATAL("Unable to execute target application ('%s')", argv[0]);
      //检测到没有插桩
      case FAULT_NOINST:

        FATAL("No instrumentation detected");

      //没有新的发现
      case FAULT_NOBITS: 
        //Number of useless starting paths，最开始的testcase的queue中没有用的数量
        useless_at_start++;

        if (!in_bitmap && !shuffle_queue)
          WARNF("No new instrumentation output, test case may be useless.");

        break;

    }

    if (q->var_behavior) WARNF("Instrumentation output varies across runs.");

    q = q->next;

  }

  //比较校验失败的数量和总的testcase的数量
  if (cal_failures) {

    if (cal_failures == queued_paths)
      FATAL("All test cases time out%s, giving up!",
            skip_crashes ? " or crash" : "");

    WARNF("Skipped %u test cases (%0.02f%%) due to timeouts%s.", cal_failures,
          ((double)cal_failures) * 100 / queued_paths,
          skip_crashes ? " or crashes" : "");

    if (cal_failures * 5 > queued_paths)
      WARNF(cLRD "High percentage of rejected test cases, check settings!");

  }

  OKF("All test cases processed.");

}

calibrate_case

calibrate_case 函数的运行时机至少有两个：一是程序运行之初，用于校准初始 corpus；二是发现了新路径，将有趣的用例加入 queue 时。总之进了 queue 的用例，都要被运行一遍calibrate_case 函数
总体流程如下

可见，校准过程是多次运行用例（默认是 8 次），统计各次运行的结果。

若 fork server 没有准备好，就调用 init_forkserver() 初始化 fork server
多次调用 run_target() 运行目标程序，观察结果。若没有任何 hit count 命中，则认为程序未插桩，报告错误。
如果发现对某用例多次运行程序，其表现不一致，则将执行次数提升到 40 次，并更新 var_bytes[] （这个全局变量表示 shm 中哪些位置存在不一致性）。另外，将 queue entry 的 var_behavior 标记设为 1。
更新 queue entry 信息，例如将 exec_us 字段设为校准过程中的执行时间均值。
给这个用例打分，并更新 top_rated 指针数组。

res = calibrate_case(argv, q, use_mem, 0, 1);


static u8 calibrate_case(char** argv, struct queue_entry* q, u8* use_mem,
                         u32 handicap, u8 from_queue) {

  static u8 first_trace[MAP_SIZE];
//存储run_target结果  new_bits记录 have_new_bits函数更大的返回值                 是否有variable特性    have_new_bits
  u8  fault = 0,         new_bits = 0,                                           var_detected = 0,    hnb = 0,
      //是否是第一次跑
      first_run = (q->exec_cksum == 0);

  u64 start_us, stop_us;

  s32 old_sc = stage_cur, old_sm = stage_max;
  u32 use_tmout = exec_tmout;
  u8* old_sn = stage_name;

  /* Be a bit more generous about timeouts when resuming sessions, or when
     trying to calibrate already-added finds. This helps avoid trouble due
     to intermittent latency. */
  //若是 resume 先前的 fuzz 过程，或这个用例并非初始 corpus，则多容忍一点超时
  if (!from_queue || resuming_fuzz)
    use_tmout = MAX(exec_tmout + CAL_TMOUT_ADD,
                    exec_tmout * CAL_TMOUT_PERC / 100);

  q->cal_failed++;

  stage_name = "calibration";
  stage_max  = fast_cal ? 3 : CAL_CYCLES;

  /* Make sure the forkserver is up before we do anything, and let's not
     count its spin-up time toward binary calibration. */

  // 如果 fork server 没有准备好，就初始化 fork server
  if (dumb_mode != 1 && !no_forkserver && !forksrv_pid)
    init_forkserver(argv);

  if (q->exec_cksum) {
    // 若已经有过 checksum，则将 trace_bits 备份到局部静态变量 first_trace（因为 trace_bits 马上就要被改掉了）
    memcpy(first_trace, trace_bits, MAP_SIZE);
    hnb = has_new_bits(virgin_bits);
    if (hnb > new_bits) new_bits = hnb;

  }

  start_us = get_cur_time_us();

  // 多次执行用例
  for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {

    u32 cksum;

    if (!first_run && !(stage_cur % stats_update_freq)) show_stats();
    // 把用例写入target文件
    write_to_testcase(use_mem, q->len);
    // 运行目标程序
    fault = run_target(argv, use_tmout);

    /* stop_soon is set by the handler for Ctrl+C. When it's pressed,
       we want to bail out quickly. */

    if (stop_soon || fault != crash_mode) goto abort_calibration;

    // shm 没有命中记录，说明没插桩
    if (!dumb_mode && !stage_cur && !count_bytes(trace_bits)) {
      fault = FAULT_NOINST;
      goto abort_calibration;
    }

    // 计算本次执行路径的消息摘要
    cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST);

    // 有两种情况，要么这个用例从未执行过，要么这个用例多次执行的表现不一致
    if (q->exec_cksum != cksum) {
      // 更新 virgin_bits
      hnb = has_new_bits(virgin_bits);
      if (hnb > new_bits) new_bits = hnb;
      // q->exec_cksum不为0，则这个用例以前执行过，且这次执行与以前表现不一致
      if (q->exec_cksum) {

        u32 i;

        for (i = 0; i < MAP_SIZE; i++) {
          // 记录不一致的 hit count 位置，并把本用例的执行次数提升到 40 次
          if (!var_bytes[i] && first_trace[i] != trace_bits[i]) {

            var_bytes[i] = 1;
            stage_max    = CAL_CYCLES_LONG;

          }

        }

        var_detected = 1;

      } else {
        // 这是该用例首次执行，记录 checksum 到 q->exec_cksum 中 ，同时将trace_bits复制到first_trace中
        q->exec_cksum = cksum;
        memcpy(first_trace, trace_bits, MAP_SIZE);

      }

    }

  }

  stop_us = get_cur_time_us();

  total_cal_us     += stop_us - start_us;
  total_cal_cycles += stage_max;

  /* OK, let's collect some stats about the performance of this test case.
     This is used for fuzzing air time calculations in calculate_score(). */
  // 更新 queue entry 信息
  q->exec_us     = (stop_us - start_us) / stage_max;
  q->bitmap_size = count_bytes(trace_bits);
  q->handicap    = handicap;
  q->cal_failed  = 0;

  total_bitmap_size += q->bitmap_size;
  total_bitmap_entries++;

  // 给这个用例打分，并更新 top_rated 指针数组 
  update_bitmap_score(q);

  /* If this case didn't result in new output from the instrumentation, tell
     parent. This is a non-critical problem, but something to warn the user
     about. */

  // 如果这个用例没有产生新的本质不同的路径，则报告用户
  if (!dumb_mode && first_run && !fault && !new_bits) fault = FAULT_NOBITS;

abort_calibration:
  // 若发现了新的边，更新一些信息
  if (new_bits == 2 && !q->has_new_cov) {
    q->has_new_cov = 1;
    queued_with_cov++;
  }

  /* Mark variable paths. */

  if (var_detected) {
    // 若该用例多次运行的行为不一致，统计有多少个位置的 hit count 不一致
    var_byte_count = count_bytes(var_bytes);

    // 将这个 queue entry 标记为 var_behavior
    if (!q->var_behavior) {
      mark_as_variable(q);
      queued_variable++;
    }

  }

  // 退出 calibration 模式，恢复现场
  stage_name = old_sn;
  stage_cur  = old_sc;
  stage_max  = old_sm;
  
  //ui
  if (!first_run) show_stats();

  return fault;

}

init_forkserver

总体流程如图所示
父进程是afl-fuzz本身，子进程是被fuzz的target程序，也即是fork server
子进程的任务

可见，子进程做的事情是：

把内存大小限制为 50MB、把 core dump 大小上限设为 0。
创建一个新的进程组。
关掉不用的 fd。
若目标程序从 stdin 读入，则重定向到用例文件。
把 fd 198 重定向到接收 supervisor 发来消息的管道、把 fd 199 重定向到往 supervisor 回复信息的管道。
如果用户没有给出 ASan 和 MSan 的选项，则指定为默认值。
执行目标程序。目标程序会在第一个入口点停下来，往 fd 199 写四个字节内容。由于发送成功，目标程序会担任 fork server，等待 supervisor 发来的指令。

总之：子进程收拾一下现场，维护通讯管道，然后把自己 execv 成 fork server

父进程的任务

父进程会等待 fork server 发来 4字节内容，若接收成功，则 fork server 成功启动，初始化完成；否则做一点错误处理

EXP_ST void init_forkserver(char** argv) {

  static struct itimerval it;
  int st_pipe[2], ctl_pipe[2];
  int status;
  s32 rlen;

  ACTF("Spinning up the fork server...");
  // 写入 st_pipe[1] 的内容可以在 st_pipe[0] 读到。这个管道用于 fd 199，是 fork server -> supervisor 方向
  // 写入 ctl_pipe[1] 的内容可以在 ctl_pipe[0] 读到。这个管道用于 fd 198，是 supervisor -> fork server 方向
  if (pipe(st_pipe) || pipe(ctl_pipe)) PFATAL("pipe() failed");

  forksrv_pid = fork();

  if (forksrv_pid < 0) PFATAL("fork() failed");


  //******************************************子进程******************************************
  if (!forksrv_pid) {

    struct rlimit r;

    /* Umpf. On OpenBSD, the default fd limit for root users is set to
       soft 128. Let's try to fix that... */

    if (!getrlimit(RLIMIT_NOFILE, &r) && r.rlim_cur < FORKSRV_FD + 2) {

      r.rlim_cur = FORKSRV_FD + 2;
      setrlimit(RLIMIT_NOFILE, &r); /* Ignore errors */

    }

    if (mem_limit) {

      r.rlim_max = r.rlim_cur = ((rlim_t)mem_limit) << 20;
      setrlimit(RLIMIT_DATA, &r); /* Ignore errors */
    }

    /* Dumping cores is slow and can lead to anomalies if SIGKILL is delivered
       before the dump is complete. */

    r.rlim_max = r.rlim_cur = 0;

    setrlimit(RLIMIT_CORE, &r); /* Ignore errors */

    /* Isolate the process and configure standard descriptors. If out_file is
       specified, stdin is /dev/null; otherwise, out_fd is cloned instead. */

    setsid();

    dup2(dev_null_fd, 1);
    dup2(dev_null_fd, 2);

    if (out_file) {

      dup2(dev_null_fd, 0);

    } else {

      dup2(out_fd, 0);
      close(out_fd);

    }

    /* Set up control and status pipes, close the unneeded original fds. */
    // fork server 写 st_pipe[1]、读 ctl_pipe[0]
    // 将 fd 198 重定向到 ctl_pipe[0] 以接收 supervisor 指令
    // 将 fd 199 重定向到 st_pipe[1] 以向 supervisor 发送回复
    if (dup2(ctl_pipe[0], FORKSRV_FD) < 0) PFATAL("dup2() failed");
    if (dup2(st_pipe[1], FORKSRV_FD + 1) < 0) PFATAL("dup2() failed");

    close(ctl_pipe[0]);
    close(ctl_pipe[1]);
    close(st_pipe[0]);
    close(st_pipe[1]);

    close(out_dir_fd);
    close(dev_null_fd);
    close(dev_urandom_fd);
    close(fileno(plot_file));

    //处理环境变量，此处略
    /*
    ...
    */

    // 执行目标程序。它会在第一个入口点停下来，给 supervisor 发送 4 字节内容，并等待指令
    execv(target_path, argv);

    /* Use a distinctive bitmap signature to tell the parent about execv()
       falling through. */

    *(u32*)trace_bits = EXEC_FAIL_SIG;
    exit(0);

  }

  //*****************************************父进程******************************************
  /* Close the unneeded endpoints. */

  // supervisor 读 st_pipe[0]、写 ctl_pipe[1]
  close(ctl_pipe[0]);
  close(st_pipe[1]);

  fsrv_ctl_fd = ctl_pipe[1];
  fsrv_st_fd  = st_pipe[0];

  /* Wait for the fork server to come up, but don't wait too long. */
  // 设置定时器，等待 fork server 发来 4 字节
  it.it_value.tv_sec = ((exec_tmout * FORK_WAIT_MULT) / 1000);
  it.it_value.tv_usec = ((exec_tmout * FORK_WAIT_MULT) % 1000) * 1000;

  setitimer(ITIMER_REAL, &it, NULL);
  // 注意 read 是阻塞的，会等到 fork server 发来 4字节 或者超时为止
  rlen = read(fsrv_st_fd, &status, 4);

  it.it_value.tv_sec = 0;
  it.it_value.tv_usec = 0;

  setitimer(ITIMER_REAL, &it, NULL);

  /* If we have a four-byte "hello" message from the server, we're all set.
     Otherwise, try to figure out what went wrong. */
     
  // 读到 4 个字节，说明 fork server 成功启动
  if (rlen == 4) {
    OKF("All right - fork server is up.");
    return;
  }
  //以下为错误处理，略
  /*
  ...
  */

}

setsid函数

setsid() 是一个用于创建新会话和进程组的系统调用函数。它主要用于以下目的：

创建新会话： 调用setsid()的进程成为一个新会话的会话首领。新会话与调用进程原来所属的会话完全独立。

创建新进程组： 调用setsid()的进程成为新进程组的组长。新进程组的ID与调用setsid()的进程的进程ID（PID）相同。

脱离控制终端： 如果调用setsid()的进程原来有控制终端，调用setsid()之后，该进程会与原来的控制终端分离，并且新会话不会分配控制终端。此功能常用于后台进程（如守护进程）的创建，以确保它们不再依赖任何终端，并且不会意外地收到来自终端的信号（例如SIGHUP）。

返回值：

成功时，setsid()返回新会话ID（即调用进程的进程ID）。如果调用进程已经是一个进程组的组长，则setsid()调用失败，并返回-1，errno被设置为EPERM。常见应用：

setsid()经常用于启动守护进程（Daemon），确保进程独立于终端，并且不会意外地受到终端的控制信号干扰。

在 AFL 中，setsid() 的使用有以下几个目的(由GPT生成)：

隔离 fork server 进程：通过调用 setsid()，fork server 进程被隔离在一个新的会话和进程组中，这样可以避免受到原有终端和其他进程组的干扰，确保它独立运行。

防止信号干扰：创建新的会话和进程组后，fork server 进程不会受到来自父进程组的信号（如终端信号或 Ctrl+C），从而提高其稳定性。

避免意外退出：由于 fork server 进程与终端的控制脱钩，它在终端关闭时不会因为 SIGHUP 信号而意外退出。

有关st_pipe[2], ctl_pipe[2]理解

pipe() 函数用于创建一个无名管道，并返回两个文件描述符，分别指向管道的读端（read end）和写端（write end）。st_pipe[0] 和 ctl_pipe[0] 指向管道的读端，st_pipe[1] 和 ctl_pipe[1] 指向管道的写端

pipe(st_pipe) || pipe(ctl_pipe)

st_pipe对应199,ctl_pipe对应198
可以这样理解，st_pipe就是state_pipe，所以对于supervisor，它只用从st_pipe读取数据获取fork server状态即可，因此引用st_pipe[0]。而对于fork server要向st_pipe写数据，因此要用st_pipe[1]
同时ctl_pipe就是control_pipe，所以对于supervisor，它要向fork server传达命令，因此要用ctl_pipe[1]。而对于fork server要获取命令，因为要用ctl_pipe[0]

有关子进程execv(target_path, argv)这个函数的理解

在afl-as源码中分析了main payload的内容，有如下部分,这就是fork server要做的事

if ( write(199, &_afl_temp, 4uLL) == 4 )
      {
        while ( 1 )
        {
          v12 = 198;
          if ( read(198, &_afl_temp, 4uLL) != 4 )
            break;
          LODWORD(v13) = fork();
          if ( v13 < 0 )
            break;
          if ( !v13 )
            goto __afl_fork_resume;
          _afl_fork_pid = v13;
          write(199, &_afl_fork_pid, 4uLL);
          v12 = _afl_fork_pid;
          LODWORD(v14) = waitpid(_afl_fork_pid, &_afl_temp, 0);
          if ( v14 <= 0 )
            break;
          write(199, &_afl_temp, 4uLL);
        }
        _exit(v12);
      }

向fd=199写入4字节，等待supervisor传达过来的指令

has_new_bits

更新 virgin_bits。返回值：1 = 仅 hit count 更新；2 = 出现了新的边

/* Check if the current execution path brings anything new to the table.
   Update virgin bits to reflect the finds. Returns 1 if the only change is
   the hit-count for a particular tuple; 2 if there are new tuples seen. 
   Updates the map, so subsequent calls will always return 0.

   This function is called after every exec() on a fairly large buffer, so
   it needs to be fast. We do this in 32-bit and 64-bit flavors. */

static inline u8 has_new_bits(u8* virgin_map) {

  u32* current = (u32*)trace_bits;
  u32* virgin  = (u32*)virgin_map;
  u32  i = (MAP_SIZE >> 2);

  u8   ret = 0;

  while (i--) {

    /* Optimize for (*current & *virgin) == 0 - i.e., no bits in current bitmap
       that have not been already cleared from the virgin map - since this will
       almost always be the case. */

    if (unlikely(*current) && unlikely(*current & *virgin)) {

      if (likely(ret < 2)) {

        u8* cur = (u8*)current;
        u8* vir = (u8*)virgin;

        /* Looks like we have not found any new bytes yet; see if any non-zero
           bytes in current[] are pristine in virgin[]. */

        //只有virgin为\xff才表示这个路径未被探索过，只要这个路径被探索过一次，就说明virgin已经不为\xff
        //此时就让ret=1，表示只是hit count增加
        if ((cur[0] && vir[0] == 0xff) || (cur[1] && vir[1] == 0xff) ||
            (cur[2] && vir[2] == 0xff) || (cur[3] && vir[3] == 0xff)) ret = 2;
        else ret = 1;

      }

      *virgin &= ~*current;

    }

    current++;
    virgin++;

  }

  if (ret && virgin_map == virgin_bits) bitmap_changed = 1;

  return ret;

}

write_to_testcase

把testcase内容写进target文件中

/* Write modified data to file for testing. If out_file is set, the old file
   is unlinked and a new one is created. Otherwise, out_fd is rewound and
   truncated. */

static void write_to_testcase(void* mem, u32 len) {

  s32 fd = out_fd;

  if (out_file) {

    unlink(out_file); /* Ignore errors. */

    fd = open(out_file, O_WRONLY | O_CREAT | O_EXCL, 0600);

    if (fd < 0) PFATAL("Unable to create '%s'", out_file);

  } else lseek(fd, 0, SEEK_SET);

  ck_write(fd, mem, len, out_file);

  if (!out_file) {

    if (ftruncate(fd, len)) PFATAL("ftruncate() failed");
    lseek(fd, 0, SEEK_SET);

  } else close(fd);

}

run_target

run_target会把trace_bits清空，但是注意之前已经保存过

if (q->exec_cksum) {

    memcpy(first_trace, trace_bits, MAP_SIZE);
    hnb = has_new_bits(virgin_bits);
    if (hnb > new_bits) new_bits = hnb;

  }

用dumb_mode跑,或者不用fork server，最后都是子进程在调用execv(target_path, argv); 父进程就waitpid(child_pid, &status, 0)
其他情况下

write(fsrv_ctl_fd, &prev_timed_out, 4) fuzzer向fork server发送命令
read(fsrv_st_fd, &child_pid, 4) fuzzer从fork server获取child_pid
read(fsrv_st_fd, &status, 4) fuzzer从fork server获取状态

可以看到这个和fork server紧密相关，再来复习一次和fork server相关内容。while(1)除非fork失败否则不会退出，也就是说这个fork server第一次启动后就一直在这个while循环中，然后通过read和write和fuzzer进行阻塞式的交流，然后每次交流后fork出子进程执行hit count的增加任务

 if ( write(199, &_afl_temp, 4uLL) == 4 )
      {
        while ( 1 )
        {
          v12 = 198;
          if ( read(198, &_afl_temp, 4uLL) != 4 )
            break;
          LODWORD(v13) = fork();
          if ( v13 < 0 )
            break;
          if ( !v13 )
            goto __afl_fork_resume;
          _afl_fork_pid = v13;
          write(199, &_afl_fork_pid, 4uLL);
          v12 = _afl_fork_pid;
          LODWORD(v14) = waitpid(_afl_fork_pid, &_afl_temp, 0);
          if ( v14 <= 0 )
            break;
          write(199, &_afl_temp, 4uLL);
        }
      }

/* Execute target application, monitoring for timeouts. Return status
   information. The called program will update trace_bits[]. */

static u8 run_target(char** argv, u32 timeout) {

  static struct itimerval it;
  static u32 prev_timed_out = 0;
  static u64 exec_ms = 0;

  int status = 0;
  u32 tb4;

  child_timed_out = 0;

  /* After this memset, trace_bits[] are effectively volatile, so we
     must prevent any earlier operations from venturing into that
     territory. */

  memset(trace_bits, 0, MAP_SIZE);
  MEM_BARRIER();

  /* If we're running in "dumb" mode, we can't rely on the fork server
     logic compiled into the target program, so we will just keep calling
     execve(). There is a bit of code duplication between here and 
     init_forkserver(), but c'est la vie. */


  //用dumb_mode跑,或者不用fork server
  if (dumb_mode == 1 || no_forkserver) {

    child_pid = fork();

    if (child_pid < 0) PFATAL("fork() failed");

    //****************************************子程序********************************************
    if (!child_pid) {

      struct rlimit r;

      if (mem_limit) {

        r.rlim_max = r.rlim_cur = ((rlim_t)mem_limit) << 20;

#ifdef RLIMIT_AS

        setrlimit(RLIMIT_AS, &r); /* Ignore errors */

#else

        setrlimit(RLIMIT_DATA, &r); /* Ignore errors */

#endif /* ^RLIMIT_AS */

      }

      r.rlim_max = r.rlim_cur = 0;

      setrlimit(RLIMIT_CORE, &r); /* Ignore errors */

      /* Isolate the process and configure standard descriptors. If out_file is
         specified, stdin is /dev/null; otherwise, out_fd is cloned instead. */

      setsid();

      dup2(dev_null_fd, 1);
      dup2(dev_null_fd, 2);

      if (out_file) {

        dup2(dev_null_fd, 0);

      } else {

        dup2(out_fd, 0);
        close(out_fd);

      }

      /* On Linux, would be faster to use O_CLOEXEC. Maybe TODO. */

      close(dev_null_fd);
      close(out_dir_fd);
      close(dev_urandom_fd);
      close(fileno(plot_file));

      /* Set sane defaults for ASAN if nothing else specified. */

      setenv("ASAN_OPTIONS", "abort_on_error=1:"
                             "detect_leaks=0:"
                             "symbolize=0:"
                             "allocator_may_return_null=1", 0);

      setenv("MSAN_OPTIONS", "exit_code=" STRINGIFY(MSAN_ERROR) ":"
                             "symbolize=0:"
                             "msan_track_origins=0", 0);
      //这个不和init_forkserver内容一致???
      execv(target_path, argv);

      /* Use a distinctive bitmap value to tell the parent about execv()
         falling through. */

      *(u32*)trace_bits = EXEC_FAIL_SIG;
      exit(0);

    }

  } else {

    s32 res;

    /* In non-dumb mode, we have the fork server up and running, so simply
       tell it to have at it, and then read back PID. */


    //向fork server发送命令
    if ((res = write(fsrv_ctl_fd, &prev_timed_out, 4)) != 4) {

      if (stop_soon) return 0;
      RPFATAL(res, "Unable to request new process from fork server (OOM?)");

    }

    //从fork server获取child_pid
    if ((res = read(fsrv_st_fd, &child_pid, 4)) != 4) {

      if (stop_soon) return 0;
      RPFATAL(res, "Unable to request new process from fork server (OOM?)");

    }

    if (child_pid <= 0) FATAL("Fork server is misbehaving (OOM?)");

  }

  /* Configure timeout, as requested by user, then wait for child to terminate. */

  it.it_value.tv_sec = (timeout / 1000);
  it.it_value.tv_usec = (timeout % 1000) * 1000;

  setitimer(ITIMER_REAL, &it, NULL);

  /* The SIGALRM handler simply kills the child_pid and sets child_timed_out. */

  if (dumb_mode == 1 || no_forkserver) {

    if (waitpid(child_pid, &status, 0) <= 0) PFATAL("waitpid() failed");

  } else {

    s32 res;

    if ((res = read(fsrv_st_fd, &status, 4)) != 4) {

      if (stop_soon) return 0;
      RPFATAL(res, "Unable to communicate with fork server (OOM?)");

    }

  }

  if (!WIFSTOPPED(status)) child_pid = 0;

  getitimer(ITIMER_REAL, &it);
  exec_ms = (u64) timeout - (it.it_value.tv_sec * 1000 +
                             it.it_value.tv_usec / 1000);

  it.it_value.tv_sec = 0;
  it.it_value.tv_usec = 0;

  setitimer(ITIMER_REAL, &it, NULL);

  total_execs++;

  /* Any subsequent operations on trace_bits must not be moved by the
     compiler below this point. Past this location, trace_bits[] behave
     very normally and do not have to be treated as volatile. */

  MEM_BARRIER();

  tb4 = *(u32*)trace_bits;

  classify_counts((u32*)trace_bits);

  prev_timed_out = child_timed_out;


  //根据子进程的执行情况返回合适的值
  /* Report outcome to caller. */

  if (WIFSIGNALED(status) && !stop_soon) {

    kill_signal = WTERMSIG(status);

    if (child_timed_out && kill_signal == SIGKILL) return FAULT_TMOUT;

    return FAULT_CRASH;

  }

  /* A somewhat nasty hack for MSAN, which doesn't support abort_on_error and
     must use a special exit code. */

  if (uses_asan && WEXITSTATUS(status) == MSAN_ERROR) {
    kill_signal = 0;
    return FAULT_CRASH;
  }

  if ((dumb_mode == 1 || no_forkserver) && tb4 == EXEC_FAIL_SIG)
    return FAULT_ERROR;

  /* It makes sense to account for the slowest units only if the testcase was run
  under the user defined timeout. */
  if (!(timeout > exec_tmout) && (slowest_exec_ms < exec_ms)) {
    slowest_exec_ms = exec_ms;
  }

  return FAULT_NONE;

}

classify_counts

将trace_bits中的内容映射到合适的桶中

static inline void classify_counts(u32* mem) {

  u32 i = MAP_SIZE >> 2;

  while (i--) {

    /* Optimize for sparse bitmaps. */

    if (unlikely(*mem)) {

      u16* mem16 = (u16*)mem;

      mem16[0] = count_class_lookup16[mem16[0]];
      mem16[1] = count_class_lookup16[mem16[1]];

    }

    mem++;

  }

}

count_bytes

看有多个少字节值不为0

#define FF(_b)  (0xff << ((_b) << 3))
/* Count the number of bytes set in the bitmap. Called fairly sporadically,
   mostly to update the status screen or calibrate and examine confirmed
   new paths. */
static u32 count_bytes(u8* mem) {

  u32* ptr = (u32*)mem;
  u32  i   = (MAP_SIZE >> 2);
  u32  ret = 0;

  while (i--) {

    u32 v = *(ptr++);

    if (!v) continue;
    if (v & FF(0)) ret++;
    if (v & FF(1)) ret++;
    if (v & FF(2)) ret++;
    if (v & FF(3)) ret++;

  }

  return ret;

}

hash32

用于计算cksum，具体细节不必深究

#define ROL32(_x, _r)  ((((u32)(_x)) << (_r)) | (((u32)(_x)) >> (32 - (_r))))

static inline u32 hash32(const void* key, u32 len, u32 seed) {

  const u32* data  = (u32*)key;
  u32 h1 = seed ^ len;

  len >>= 2;

  while (len--) {

    u32 k1 = *data++;

    k1 *= 0xcc9e2d51;
    k1  = ROL32(k1, 15);
    k1 *= 0x1b873593;

    h1 ^= k1;
    h1  = ROL32(h1, 13);
    h1  = h1 * 5 + 0xe6546b64;

  }

  h1 ^= h1 >> 16;
  h1 *= 0x85ebca6b;
  h1 ^= h1 >> 13;
  h1 *= 0xc2b2ae35;
  h1 ^= h1 >> 16;

  return h1;

}

update_bitmap_score

当 fuzz 进行到后期，可能一些用例的边覆盖度，是它们祖先的边覆盖度的严格超集，因此可以考虑缩小 corpus，专注于这些超级用例（当然，其他用例不是被彻底放弃了，而是被以很大的概率忽略掉）。因此，AFL 倾向于找个 queue 中用例的子集，使得它们在覆盖所有已知边的同时尽可能小。这样的用例被认为是 favored 的。AFL 作者声称，这样形成的 favored 集合，比整个 corpus 可以小 5 到 10 倍。

然而，子集覆盖问题是 NP-完全的。AFL 必须保证速度，所以它采用了一个不准确但是很高速的算法：对于 shm 的每一个位置（这代表一条边），记录 top_rated 指针，指向 queue 中覆盖了这条边的、分数最小的那个用例。一个用例的分数等于 exec_us * len，即执行时间与文件大小的乘积。

static void update_bitmap_score(struct queue_entry* q) {

  u32 i;
  // 计算case的fav_factor，计算方法是执行时间和样例大小的乘积
  u64 fav_factor = q->exec_us * q->len;

  /* For every byte set in trace_bits[], see if there is a previous winner,
     and how it compares to us. */
  // 遍历trace_bits数组
  for (i = 0; i < MAP_SIZE; i++)
    // 不为0，表示已经被覆盖到的路径
    if (trace_bits[i]) {
      // 检查top_rated是否存在
       if (top_rated[i]) {

         /* Faster-executing or smaller test cases are favored. */
        // 如果top_rated[i]的更小，则代表它的更优，不做处理，继续遍历下一个路径
         if (fav_factor > top_rated[i]->exec_us * top_rated[i]->len) continue;

         /* Looks like we're going to win. Decrease ref count for the
            previous winner, discard its trace_bits[] if necessary. */

         if (!--top_rated[i]->tc_ref) {
           ck_free(top_rated[i]->trace_mini);
           top_rated[i]->trace_mini = 0;
         }

       }

       /* Insert ourselves as the new winner. */
      // 设置为当前的testcase
       top_rated[i] = q;
       q->tc_ref++;

       if (!q->trace_mini) {
         q->trace_mini = ck_alloc(MAP_SIZE >> 3);
         minimize_bits(q->trace_mini, trace_bits);
       }

       score_changed = 1;

     }

}

minimize_bits

在程序分析和模糊测试中，经常使用位图（bitmap）来记录程序执行路径。位图中的每一位（bit）通常对应程序中某个特定的路径分支或代码块的执行情况。

完整位图 (trace_bits): 每一位对应某个路径，如果该位为 1，则表示程序在一次执行中触发了这个路径。压缩位图 (trace_mini): 用于压缩存储这些路径信息，目的是减少内存占用。压缩过程中，只记录哪些路径被触发过，而不记录具体的触发次数。

总的来说这一步就是进行路径压缩，以更小的存储形式存到q->trace_mini中

static void minimize_bits(u8* dst, u8* src) {

  u32 i = 0;

  while (i < MAP_SIZE) {

    if (*(src++)) dst[i >> 3] |= 1 << (i & 7);
    i++;

  }

}

mark_as_variable

variable就是一个testcase但是执行结果却不一样，那么就将其保存到out_dir/queue/.state/variable_behavior/中，同时更新queue中其var_behavior成员的值

/* Mark as variable. Create symlinks if possible to make it easier to examine
   the files. */

static void mark_as_variable(struct queue_entry* q) {

  u8 *fn = strrchr(q->fname, '/') + 1, *ldest;

  ldest = alloc_printf("../../%s", fn);
  fn = alloc_printf("%s/queue/.state/variable_behavior/%s", out_dir, fn);

  if (symlink(ldest, fn)) {

    s32 fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, 0600);
    if (fd < 0) PFATAL("Unable to create '%s'", fn);
    close(fd);

  }

  ck_free(ldest);
  ck_free(fn);

  q->var_behavior = 1;

}

show_init_stats

ui部分

write_stats_file

更新 fuzzer_stats 文件

save_auto

保存 auto extras

/* Save automatically generated extras. */

static void save_auto(void) {

  u32 i;

  if (!auto_changed) return;
  auto_changed = 0;

  for (i = 0; i < MIN(USE_AUTO_EXTRAS, a_extras_cnt); i++) {

    u8* fn = alloc_printf("%s/queue/.state/auto_extras/auto_%06u", out_dir, i);
    s32 fd;

    fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600);

    if (fd < 0) PFATAL("Unable to create '%s'", fn);

    ck_write(fd, a_extras[i].data, a_extras[i].len, fn);

    close(fd);
    ck_free(fn);

  }

}

find_start_position

若是恢复之前的 fuzz，则找到该从队列的什么位置继续

seek_to = find_start_position();
/* When resuming, try to find the queue position to start from. This makes sense
   only when resuming, and when we can find the original fuzzer_stats. */

static u32 find_start_position(void) {

  static u8 tmp[4096]; /* Ought to be enough for anybody. */

  u8  *fn, *off;
  s32 fd, i;
  u32 ret;

  if (!resuming_fuzz) return 0;

  if (in_place_resume) fn = alloc_printf("%s/fuzzer_stats", out_dir);
  else fn = alloc_printf("%s/../fuzzer_stats", in_dir);

  fd = open(fn, O_RDONLY);
  ck_free(fn);

  if (fd < 0) return 0;

  i = read(fd, tmp, sizeof(tmp) - 1); (void)i; /* Ignore errors */
  close(fd);

  off = strstr(tmp, "cur_path          : ");
  if (!off) return 0;

  ret = atoi(off + 20);
  if (ret >= queued_paths) ret = 0;
  return ret;

}