afl-fuzz源码分析上篇
[TOC]
前言
- 因为afl-fuzz.c源代码很长,因此这部分文章只打算整体过一遍main中fuzz前的准备工作相关部分
- 函数只分析和fuzz过程有关的一些函数,有些文件检查,路经检查,系统设置检查那种一般都略过不进行分析,这些不重要的就只在main中用注释的形式写出其作用功能
- 有些函数和笔者学习fuzz源码目的不相关,笔者更多是想要在把握fuzz流程的基础上,弄明白fuzz的结构,fuzz如何进行工作,如何变异,得到更合适的testcase,有些函数就算修改fuzz源码也不一定会涉及,所以只要知道功能就好了
- 本来想画思维导图的,但是觉得好麻烦就没画了。于是就按顺序分析main函数,查看函数定义,如果函数中还有其他函数,可以用##二级标题更清楚地看明白这个函数里面引用了哪些重要函数
- 笔者认为最重要的一些部分
全局变量
setup_shm
init_count_class16
read_testcases
全局变量
- 在阅读整个源码前,有必要先看看最前面定义的全局变量都是什么作用,可以根据注释,GPT,网上资料,这样有一个整体的把握。先明白这些大致会做什么,或者猜测它会做什么,实在难以通过命名明白变量的含义留个印象就行
- 同时一开始没懂的,粗略看一遍源码后再来看这些全局变量,想想这些在哪些函数出现,有什么作用,又会加深一遍印象
EXP_ST u8 *in_dir, /* Input directory with test cases */
*out_file, /* File to fuzz, if any 被fuzz的文件 */
*out_dir, /* Working & output directory */
*sync_dir, /* Synchronization directory */
*sync_id, /* Fuzzer ID -M或者-S选项后跟的字符串将被认为是sync_id */
*use_banner, /* Display banner */
*in_bitmap, /* Input bitmap 进入了一种无法正常退出或结束的状态,也就是“挂起”状态 */
*doc_path, /* Path to documentation dir */
*target_path, /* Path to target binary */
*orig_cmdline; /* Original command line */
EXP_ST u32 exec_tmout = EXEC_TIMEOUT; /* Configurable exec timeout (ms) */
static u32 hang_tmout = EXEC_TIMEOUT; /* Timeout used for hang det (ms) */
EXP_ST u64 mem_limit = MEM_LIMIT; /* Memory cap for child (MB) */
EXP_ST u32 cpu_to_bind = 0; /* id of free CPU core to bind */
static u32 stats_update_freq = 1; /* Stats update frequency (execs) 更新stats的频率*/
//以下4个变量都是在变异阶段
EXP_ST u8 skip_deterministic, /* Skip deterministic stages? */
force_deterministic, /* Force deterministic stages? */
use_splicing, /* Recombine input files? */
dumb_mode, /* Run in non-instrumented mode? */
//在update_bitmap_score函数中使用
score_changed, /* Scoring for favorites changed? */
kill_signal, /* Signal that killed the child */
//是否正在恢复一个之前中断的模糊测试任务
resuming_fuzz, /* Resuming an older fuzzing job? */
timeout_given, /* Specific timeout given? */
cpu_to_bind_given, /* Specified cpu_to_bind given? */
not_on_tty, /* stdout is not a tty */
term_too_small, /* terminal dimensions too small */
uses_asan, /* Target uses ASAN? */
//是否不用fork server
no_forkserver, /* Disable forkserver? */
/*
是否用crash_mode
case 'C': crash mode
if (crash_mode) FATAL("Multiple -C options not supported");
crash_mode = FAULT_CRASH;
break;
*/
crash_mode, /* Crash mode! Yeah! */
in_place_resume, /* Attempt in-place resume? 这个没太懂做什么的,没用过 */
//有关extras字典,还没太弄明白
auto_changed, /* Auto-generated tokens changed? */
no_cpu_meter_red, /* Feng shui on the status screen */
no_arith, /* Skip most arithmetic ops */
//打乱输入队列
shuffle_queue, /* Shuffle input queue? */
bitmap_changed = 1, /* Time to update bitmap? */
qemu_mode, /* Running in QEMU mode? */
skip_requested, /* Skip request, via SIGUSR1 */
run_over10m, /* Run time over 10 minutes? */
persistent_mode, /* Running in persistent mode? */
deferred_mode, /* Deferred forkserver mode? */
//calibrate faster那么calibrate 次数就会变少
fast_cal; /* Try to calibrate faster? */
//***************************下面这部分和fork server有关,很重要***************************************
static s32 out_fd, /* Persistent fd for out_file 这个就是被fuzz的程序的fd,注意和out_dir_fd区分 */
dev_urandom_fd = -1, /* Persistent fd for /dev/urandom */
dev_null_fd = -1, /* Persistent fd for /dev/null */
fsrv_ctl_fd, /* Fork server control pipe (write) */
fsrv_st_fd; /* Fork server status pipe (read) */
static s32 forksrv_pid, /* PID of the fork server */
child_pid = -1, /* PID of the fuzzed program */
out_dir_fd = -1; /* FD of the lock file */
EXP_ST u8* trace_bits; /* SHM with instrumentation bitmap */
EXP_ST u8 virgin_bits[MAP_SIZE], /* Regions yet untouched by fuzzing 标记仍然没有被触及到的区域 */
virgin_tmout[MAP_SIZE], /* Bits we haven't seen in tmouts 标记还没有出现在tmout的区域 */
virgin_crash[MAP_SIZE]; /* Bits we haven't seen in crashes 标记还没有出现在crash的区域 */
static u8 var_bytes[MAP_SIZE]; /* Bytes that appear to be variable */
static s32 shm_id; /* ID of the SHM region */
static volatile u8 stop_soon, /* Ctrl-C pressed? */
clear_screen = 1, /* Window resized? */
child_timed_out; /* Traced process timed out? */
//*****************************下面这部分和输入生成的队列和fuzz_one函数有关,很重要******************************
EXP_ST u32 queued_paths, /* Total number of queued testcases testecases总数*/
queued_variable, /* Testcases with variable behavior 具有可变行为的testcases总数*/
queued_at_start, /* Total number of initial inputs 最初的inputs总数 */
queued_discovered, /* Items discovered during this run 这次run发现的items数量*/
queued_imported, /* Items imported via -S 和同步测试有关 */
queued_favored, /* Paths deemed favorable 似乎favorable的路径 */
queued_with_cov, /* Paths with new coverage bytes 有新覆盖的路径 */
pending_not_fuzzed, /* Queued but not done yet 在队列中但是没有被fuzz的数量 */
pending_favored, /* Pending favored paths 被favored但是没有fuzz的数量 */
cur_skipped_paths, /* Abandoned inputs in cur cycle 当前循环跳过的数量 */
cur_depth, /* Current path depth 当前用例深度 */
max_depth, /* Max path depth 最大用例深度 */
useless_at_start, /* Number of useless starting paths */
var_byte_count, /* Bitmap bytes with var behavior count_bytes函数的返回值 */
current_entry, /* Current queue entry ID 当前queue跑到那一个用例了,这个值就是多少 */
havoc_div = 1; /* Cycle count divisor for havoc */
EXP_ST u64 total_crashes, /* Total number of crashes 总共的crashes数量 */
unique_crashes, /* Crashes with unique signatures */
total_tmouts, /* Total number of timeouts 总共的timeouts数量 */
unique_tmouts, /* Timeouts with unique signatures */
unique_hangs, /* Hangs with unique signatures */
total_execs, /* Total execve() calls 总共执行execve函数的次数 */
slowest_exec_ms, /* Slowest testcase non hang in ms */
start_time, /* Unix start time (ms) */
last_path_time, /* Time for most recent path (ms) */
last_crash_time, /* Time for most recent crash (ms) */
last_hang_time, /* Time for most recent hang (ms) */
last_crash_execs, /* Exec counter at last crash */
queue_cycle, /* Queue round counter 遍历了多少次整个queue队列 */
cycles_wo_finds, /* Cycles without any new paths 没有任何新路径的轮数 */
trim_execs, /* Execs done to trim input files */
bytes_trim_in, /* Bytes coming into the trimmer */
bytes_trim_out, /* Bytes coming outa the trimmer */
blocks_eff_total, /* Blocks subject to effector maps */
blocks_eff_select; /* Blocks selected as fuzzable */
static u32 subseq_tmouts; /* Number of timeouts in a row */
//*******************************当前fuzz的stage***************************************
//sprintf(ret, "sync:%s,src:%06u", syncing_party, syncing_case);
static u8 *stage_name = "init", /* Name of the current fuzz stage */
*stage_short, /* Short stage name */
*syncing_party; /* Currently syncing with... 目前正在和哪个用例同步 */
static s32 stage_cur, stage_max; /* Stage progression */
static s32 splicing_with = -1; /* Splicing with which test case? */
static u32 master_id, master_max; /* Master instance job splitting */
static u32 syncing_case; /* Syncing with case #... */
static s32 stage_cur_byte, /* Byte offset of current stage op */
stage_cur_val; /* Value used for stage op */
static u8 stage_val_type; /* Value type (STAGE_VAL_*) */
static u64 stage_finds[32], /* Patterns found per fuzz stage */
stage_cycles[32]; /* Execs per fuzz stage */
static u32 rand_cnt; /* Random number counter */
static u64 total_cal_us, /* Total calibration time (us) 总共校验时间 */
total_cal_cycles; /* Total calibration cycles 总共校验轮数 */
static u64 total_bitmap_size, /* Total bit count for all bitmaps 所有bitmaps总的大小 */
total_bitmap_entries; /* Number of bitmaps counted 总共bitmaps数量 */
static s32 cpu_core_count; /* CPU core count */
#ifdef HAVE_AFFINITY
static s32 cpu_aff = -1; /* Selected CPU core */
#endif /* HAVE_AFFINITY */
static FILE* plot_file; /* Gnuplot output file */
//*************************************input测试用例文件名被存在以下的结构体中*******************************
struct queue_entry {
u8* fname; /* File name for the test case 文件名称 */
u32 len; /* Input length 文件内容长度 */
u8 cal_failed, /* Calibration failed? 校验失败次数 */
trim_done, /* Trimmed? 是否被修剪过 */
was_fuzzed, /* Had any fuzzing done yet? 是否被fuzz过 */
passed_det, /* Deterministic stages passed? 确定性变异阶段已经经过 */
has_new_cov, /* Triggers new coverage? 是否触发了新的覆盖 */
var_behavior, /* Variable behavior? 是否有可变的行为 */
favored, /* Currently favored? 当前是否被favored */
fs_redundant; /* Marked as redundant in the fs? 测试用例是否被标记为冗余 */
u32 bitmap_size, /* Number of bits set in bitmap */
exec_cksum; /* Checksum of the execution trace hash32后得到的结果 */
u64 exec_us, /* Execution time (us) 执行时间 */
handicap, /* Number of queue cycles behind 经常看到但没太懂这个变量什么意思 */
depth; /* Path depth 用例深度 */
u8* trace_mini; /* Trace bytes, if kept 路径压缩后得到的trace bytes */
u32 tc_ref; /* Trace bytes ref count */
//
struct queue_entry *next, /* Next element, if any */
*next_100; /* 100 elements ahead */
};
static struct queue_entry *queue, /* Fuzzing queue (linked list) testcase list最开始的那个指针 */
*queue_cur, /* Current offset within the queue 当前处理的testscase */
*queue_top, /* Top of the list testcase list的顶部 */
*q_prev100; /* Previous 100 marker 前100标记 */
static struct queue_entry*
top_rated[MAP_SIZE]; /* Top entries for bitmap bytes 这个是用来判断用例够不够好的 */
//和extra字典有关
struct extra_data {
u8* data; /* Dictionary token data */
u32 len; /* Dictionary token length */
u32 hit_cnt; /* Use count in the corpus */
};
static struct extra_data* extras; /* Extra tokens to fuzz with */
static u32 extras_cnt; /* Total number of tokens read */
static struct extra_data* a_extras; /* Automatically selected extras */
static u32 a_extras_cnt; /* Total number of tokens available */
static u8* (*post_handler)(u8* buf, u32* len);//
/* Interesting values, as per config.h */
//**********************确定性变异的内容*************************
static s8 interesting_8[] = { INTERESTING_8 };
static s16 interesting_16[] = { INTERESTING_8, INTERESTING_16 };
static s32 interesting_32[] = { INTERESTING_8, INTERESTING_16, INTERESTING_32 };
/* Fuzzing stages */
//****************************变异所处的阶段*****************************
enum {
/* 00 */ STAGE_FLIP1,
/* 01 */ STAGE_FLIP2,
/* 02 */ STAGE_FLIP4,
/* 03 */ STAGE_FLIP8,
/* 04 */ STAGE_FLIP16,
/* 05 */ STAGE_FLIP32,
/* 06 */ STAGE_ARITH8,
/* 07 */ STAGE_ARITH16,
/* 08 */ STAGE_ARITH32,
/* 09 */ STAGE_INTEREST8,
/* 10 */ STAGE_INTEREST16,
/* 11 */ STAGE_INTEREST32,
/* 12 */ STAGE_EXTRAS_UO,
/* 13 */ STAGE_EXTRAS_UI,
/* 14 */ STAGE_EXTRAS_AO,
/* 15 */ STAGE_HAVOC,
/* 16 */ STAGE_SPLICE
};
/* Stage value types */
enum {
/* 00 */ STAGE_VAL_NONE,
/* 01 */ STAGE_VAL_LE,
/* 02 */ STAGE_VAL_BE
};
/* Execution status fault codes */
//*****************************************run_target的返回值************************************
enum {
/* 00 */ FAULT_NONE,
/* 01 */ FAULT_TMOUT,
/* 02 */ FAULT_CRASH,
/* 03 */ FAULT_ERROR,
/* 04 */ FAULT_NOINST,
/* 05 */ FAULT_NOBITS
};
main中fuzz前的准备工作相关部分
- 先看main的主要流程,再看main中一些函数,有些函数对理解fuzz不太重要的就不做细致分析了
/* Main entry point */
int main(int argc, char** argv) {
s32 opt;
u64 prev_queued = 0;
u32 sync_interval_cnt = 0, seek_to;
u8 *extras_dir = 0;
u8 mem_limit_given = 0;
u8 exit_1 = !!getenv("AFL_BENCH_JUST_ONE");
char** use_argv;
struct timeval tv;
struct timezone tz;
SAYF(cCYA "afl-fuzz " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH;
//初始化随机数种子
gettimeofday(&tv, &tz);
srandom(tv.tv_sec ^ tv.tv_usec ^ getpid());
//类似afl-gcc一样处理argv的参数
while ((opt = getopt(argc, argv, "+i:o:f:m:b:t:T:dnCB:S:M:x:QV")) > 0)
switch (opt) {
//输入用例corpus文件夹
case 'i': /* input dir */
if (in_dir) FATAL("Multiple -i options not supported");
in_dir = optarg;
if (!strcmp(in_dir, "-")) in_place_resume = 1;
break;
//输出结果文件夹
case 'o': /* output dir */
if (out_dir) FATAL("Multiple -o options not supported");
out_dir = optarg;
break;
//大写M,表示以 master 身份运行,deterministic 变异会被打开
case 'M': { /* master sync ID */
u8* c;
if (sync_id) FATAL("Multiple -S or -M options not supported");
sync_id = ck_strdup(optarg);
if ((c = strchr(sync_id, ':'))) {
*c = 0;
if (sscanf(c + 1, "%u/%u", &master_id, &master_max) != 2 ||
!master_id || !master_max || master_id > master_max ||
master_max > 1000000) FATAL("Bogus master ID passed to -M");
}
force_deterministic = 1;
}
break;
//表示以 slave 身份运行。deterministic 变异会被关闭
case 'S':
if (sync_id) FATAL("Multiple -S or -M options not supported");
sync_id = ck_strdup(optarg);
break;
//被fuzz的文件
case 'f': /* target file */
if (out_file) FATAL("Multiple -f options not supported");
out_file = optarg;
break;
//extras_dir,如果有 dictionary可以给AFL
case 'x': /* dictionary */
if (extras_dir) FATAL("Multiple -x options not supported");
extras_dir = optarg;
break;
//exec_tmout:设置 exec timeout
case 't': { /* timeout */
u8 suffix = 0;
if (timeout_given) FATAL("Multiple -t options not supported");
if (sscanf(optarg, "%u%c", &exec_tmout, &suffix) < 1 ||
optarg[0] == '-') FATAL("Bad syntax used for -t");
if (exec_tmout < 5) FATAL("Dangerously low value of -t");
if (suffix == '+') timeout_given = 2; else timeout_given = 1;
break;
}
//小写m表示mem限制
case 'm': { /* mem limit */
u8 suffix = 'M';
if (mem_limit_given) FATAL("Multiple -m options not supported");
mem_limit_given = 1;
if (!strcmp(optarg, "none")) {
mem_limit = 0;
break;
}
if (sscanf(optarg, "%llu%c", &mem_limit, &suffix) < 1 ||
optarg[0] == '-') FATAL("Bad syntax used for -m");
switch (suffix) {
case 'T': mem_limit *= 1024 * 1024; break;
case 'G': mem_limit *= 1024; break;
case 'k': mem_limit /= 1024; break;
case 'M': break;
default: FATAL("Unsupported suffix or bad syntax for -m");
}
if (mem_limit < 5) FATAL("Dangerously low value of -m");
if (sizeof(rlim_t) == 4 && mem_limit > 2000)
FATAL("Value of -m out of range on 32-bit systems");
}
break;
case 'b': { /* bind CPU core */
if (cpu_to_bind_given) FATAL("Multiple -b options not supported");
cpu_to_bind_given = 1;
if (sscanf(optarg, "%u", &cpu_to_bind) < 1 ||
optarg[0] == '-') FATAL("Bad syntax used for -b");
break;
}
//-d选项跳过确定性变异
case 'd': /* skip deterministic */
if (skip_deterministic) FATAL("Multiple -d options not supported");
skip_deterministic = 1;
use_splicing = 1;
break;
//自己是没太懂这部分,好像是找到一个很有意思的testcase想要对它进行变异然后可以用-B引入这个bitmap
//但是afl作者也说这部分他自己只用了一两次,所以这部分不做过多探究
case 'B': /* load bitmap */
/* This is a secret undocumented option! It is useful if you find
an interesting test case during a normal fuzzing process, and want
to mutate it without rediscovering any of the test cases already
found during an earlier run.
To use this mode, you need to point -B to the fuzz_bitmap produced
by an earlier run for the exact same binary... and that's it.
I only used this once or twice to get variants of a particular
file, so I'm not making this an official setting. */
if (in_bitmap) FATAL("Multiple -B options not supported");
in_bitmap = optarg;
read_bitmap(in_bitmap);
break;
//打开 crash exploration 模式。根据白皮书,此模式用于探索某个 crash 的潜力。输入一个 crash 用例,fuzzer 将生成很多 crash
case 'C': /* crash mode */
if (crash_mode) FATAL("Multiple -C options not supported");
crash_mode = FAULT_CRASH;
break;
//打开 dumb 模式(黑盒模式),不插桩运行。此模式下,目标程序不挂载 shm。若没有设置环境变量 AFL_DUMB_FORKSRV,则也不使用 fork server
case 'n': /* dumb mode */
if (dumb_mode) FATAL("Multiple -n options not supported");
if (getenv("AFL_DUMB_FORKSRV")) dumb_mode = 2; else dumb_mode = 1;
break;
case 'T': /* banner */
if (use_banner) FATAL("Multiple -T options not supported");
use_banner = optarg;
break;
case 'Q': /* QEMU mode */
if (qemu_mode) FATAL("Multiple -Q options not supported");
qemu_mode = 1;
if (!mem_limit_given) mem_limit = MEM_LIMIT_QEMU;
break;
case 'V': /* Show version number */
/* Version number has been printed already, just quit. */
exit(0);
default:
usage(argv[0]);
}
if (optind == argc || !in_dir || !out_dir) usage(argv[0]);
//设置对应的信号处理的handle,
setup_signal_handlers();
//检查ASAN参数
check_asan_opts();
//fix_up_sync函数稍后分析
if (sync_id) fix_up_sync();
if (!strcmp(in_dir, out_dir))
FATAL("Input and output directories can't be the same");
if (dumb_mode) {
if (crash_mode) FATAL("-C and -n are mutually exclusive");
if (qemu_mode) FATAL("-Q and -n are mutually exclusive");
}
//根据环境变量设置参数
if (getenv("AFL_NO_FORKSRV")) no_forkserver = 1;
if (getenv("AFL_NO_CPU_RED")) no_cpu_meter_red = 1;
if (getenv("AFL_NO_ARITH")) no_arith = 1;
if (getenv("AFL_SHUFFLE_QUEUE")) shuffle_queue = 1;
if (getenv("AFL_FAST_CAL")) fast_cal = 1;
if (getenv("AFL_HANG_TMOUT")) {
hang_tmout = atoi(getenv("AFL_HANG_TMOUT"));
if (!hang_tmout) FATAL("Invalid value of AFL_HANG_TMOUT");
}
if (dumb_mode == 2 && no_forkserver)
FATAL("AFL_DUMB_FORKSRV and AFL_NO_FORKSRV are mutually exclusive");
if (getenv("AFL_PRELOAD")) {
setenv("LD_PRELOAD", getenv("AFL_PRELOAD"), 1);
setenv("DYLD_INSERT_LIBRARIES", getenv("AFL_PRELOAD"), 1);
}
if (getenv("AFL_LD_PRELOAD"))
FATAL("Use AFL_PRELOAD instead of AFL_LD_PRELOAD");
//把 fuzzer 运行参数存进 orig_cmdline
save_cmdline(argc, argv);
//自定义banner
fix_up_banner(argv[optind]);
// 若有环境变量 AFL_NO_UI,则 not_on_tty = 1
check_if_tty();
// cpu 相关
get_core_count();
#ifdef HAVE_AFFINITY
bind_to_free_cpu();
#endif /* HAVE_AFFINITY */
// 如果 crash 掉的进程的崩溃报告会被发给某个程序,那么会引入延迟,于是 crash 可能会被误认为是超时
// 检查系统配置 /proc/sys/kernel/core_pattern,也就是一开始就要echo core > /proc/sys/kernel/core_pattern
check_crash_handling();
// 若发现 cpu 频率可调,让用户把 cpu 定在最高频率
check_cpu_governor();
// 若指定了环境变量 AFL_POST_LIBRARY,则设置 post_handler 为 lib 中的 afl_postprocess 函数
setup_post();
//初始化 shm
setup_shm();
//初始化 16bit 查找表
init_count_class16();
//在工作目录下创建一些文件夹,并打开一些 fd 备用,例如 /dev/urandom
setup_dirs_fds();
// 把初始 corpus 读入 queue
read_testcases();
// 读入自动生成的 extra(如果有)
load_auto();
// 把初始 corpus 复制到 out_dir 的 queue 文件夹下
pivot_inputs();
// 如果用户通过 -x 选项指定了 dictionary,则从那里导入 extra
if (extras_dir) load_extras(extras_dir);
if (!timeout_given) find_timeout();
detect_file_args(argv + optind + 1);
//如果没有设置被fuzz的文件才进入这个if语句,一般用不到
if (!out_file) setup_stdio_file();
//检查文件是否可以访问等相关
check_binary(argv[optind]);
start_time = get_cur_time();
if (qemu_mode)
use_argv = get_qemu_argv(argv[0], argv + optind, argc - optind);
else
use_argv = argv + optind;
/*
...
*/
}
setup_signal_handlers
- 信号处理函数,设置各种信号句柄
check_asan_opts
- 读取环境变量 ASAN_OPTIONS 和 MSAN_OPTIONS
save_cmdline
- 备份命令行指令cmdline数据
fix_up_banner
- ui相关
check_if_tty
- 若有环境变量 AFL_NO_UI,则 not_on_tty = 1
get_core_count
- cpu 相关
check_crash_handling
- 如果 crash 掉的进程的崩溃报告会被发给某个程序,那么会引入延迟,于是 crash 可能会被误认为是超时
- 检查系统配置 /proc/sys/kernel/core_pattern,也就是一开始就要echo core > /proc/sys/kernel/core_pattern
check_cpu_governor
- 若发现 cpu 频率可调,让用户把 cpu 定在最高频率
fix_up_sync
- 检查sync_id是否和要求的形式一样,然后将 sync_dir = out_dir ,out_dir = out_dir/sync_id
- 但笔者自己玩fuzz的时候还没太懂-S和-M是干啥的,而且对并行理解只有一点点,所以这里就只是看着函数知道在干嘛,对后面并行的部分也不是很懂
/* Validate and fix up out_dir and sync_dir when using -S. */
static void fix_up_sync(void) {
u8* x = sync_id;
if (dumb_mode)
FATAL("-S / -M and -n are mutually exclusive");
if (skip_deterministic) {
if (force_deterministic)
FATAL("use -S instead of -M -d");
else
FATAL("-S already implies -d");
}
while (*x) {
if (!isalnum(*x) && *x != '_' && *x != '-')
FATAL("Non-alphanumeric fuzzer ID specified via -S or -M");
x++;
}
if (strlen(sync_id) > 32) FATAL("Fuzzer ID too long");
x = alloc_printf("%s/%s", out_dir, sync_id);
sync_dir = out_dir;
out_dir = x;
if (!force_deterministic) {
skip_deterministic = 1;
use_splicing = 1;
}
}
setup_post
- 根据环境AFL_POST_LIBRARY找到共享库文件路径并加载,解析符号找到afl_postprocess函数并调用
- 自己的理解就是这和常见的libc.so.6很类似,支持用户自定义一些操作,这样就可以先不用修改fuzz相关源码而是直接加载这个lib,但是目前应该用不太到
static u8* (*post_handler)(u8* buf, u32* len);
/* Load postprocessor, if available. */
static void setup_post(void) {
void* dh;
u8* fn = getenv("AFL_POST_LIBRARY");
u32 tlen = 6;
if (!fn) return;
ACTF("Loading postprocessor from '%s'...", fn);
//用于动态加载共享库的函数
dh = dlopen(fn, RTLD_NOW);
if (!dh) FATAL("%s", dlerror());
//解析某个符号,可以类比pwn中的libc.sym['...']用法
post_handler = dlsym(dh, "afl_postprocess");
if (!post_handler) FATAL("Symbol 'afl_postprocess' not found.");
/* Do a quick test. It's better to segfault now than later =) */
post_handler("hello", &tlen);
OKF("Postprocessor installed successfully.");
}
setup_shm
/* Configure shared memory and virgin_bits. This is called at startup. */
EXP_ST void setup_shm(void) {
u8* shm_str;
//in_bitmap是用-B选项引入的东西
if (!in_bitmap) memset(virgin_bits, 255, MAP_SIZE);
memset(virgin_tmout, 255, MAP_SIZE);
memset(virgin_crash, 255, MAP_SIZE);
//创建一个新的共享内存段,并返回一个共享内存标识符
shm_id = shmget(IPC_PRIVATE, MAP_SIZE, IPC_CREAT | IPC_EXCL | 0600);
if (shm_id < 0) PFATAL("shmget() failed");
atexit(remove_shm);
shm_str = alloc_printf("%d", shm_id);
/* If somebody is asking us to fuzz instrumented binaries in dumb mode,
we don't want them to detect instrumentation, since we won't be sending
fork server commands. This should be replaced with better auto-detection
later on, perhaps? */
//#define SHM_ENV_VAR "__AFL_SHM_ID"
//如果是dumb_mode就不用shm所以不设置,否则设置__AFL_SHM_ID这个环境变量,之前分析的fork server获取环境变量就是在这里设置的
if (!dumb_mode) setenv(SHM_ENV_VAR, shm_str, 1);
ck_free(shm_str);
trace_bits = shmat(shm_id, NULL, 0);
if (trace_bits == (void *)-1) PFATAL("shmat() failed");
}
init_count_class16
- 将 hit count 替换为桶 id,一次读取两个字节来进行处理,这样可以提升效率
/* Destructively classify execution counts in a trace. This is used as a
preprocessing step for any newly acquired traces. Called on every exec,
must be fast. */
static const u8 count_class_lookup8[256] = {
[0] = 0,
[1] = 1,
[2] = 2,
[3] = 4,
[4 ... 7] = 8,
[8 ... 15] = 16,
[16 ... 31] = 32,
[32 ... 127] = 64,
[128 ... 255] = 128
};
static u16 count_class_lookup16[65536];
EXP_ST void init_count_class16(void) {
u32 b1, b2;
for (b1 = 0; b1 < 256; b1++)
for (b2 = 0; b2 < 256; b2++)
count_class_lookup16[(b1 << 8) + b2] =
(count_class_lookup8[b1] << 8) |
count_class_lookup8[b2];
}
setup_dirs_fds
- 这里主要都是创建一些文件夹和获得fd,具体这些文件夹存什么东西在使用fuzz中会更加清楚,现在不用管这些细节
/* Prepare output directories and fds. */
EXP_ST void setup_dirs_fds(void) {
u8* tmp;
s32 fd;
ACTF("Setting up output directories...");
//sync_id不为空则创建sync_idr
if (sync_id && mkdir(sync_dir, 0700) && errno != EEXIST)
PFATAL("Unable to create '%s'", sync_dir);
if (mkdir(out_dir, 0700)) {
if (errno != EEXIST) PFATAL("Unable to create '%s'", out_dir);
maybe_delete_out_dir();
} else {
if (in_place_resume)
FATAL("Resume attempted but old output directory not found");
out_dir_fd = open(out_dir, O_RDONLY);
#ifndef __sun
if (out_dir_fd < 0 || flock(out_dir_fd, LOCK_EX | LOCK_NB))
PFATAL("Unable to flock() output directory.");
#endif /* !__sun */
}
/* Queue directory for any starting & discovered paths. */
tmp = alloc_printf("%s/queue", out_dir);
if (mkdir(tmp, 0700)) PFATAL("Unable to create '%s'", tmp);
ck_free(tmp);
/* Top-level directory for queue metadata used for session
resume and related tasks. */
tmp = alloc_printf("%s/queue/.state/", out_dir);
if (mkdir(tmp, 0700)) PFATAL("Unable to create '%s'", tmp);
ck_free(tmp);
/* Directory for flagging queue entries that went through
deterministic fuzzing in the past. */
tmp = alloc_printf("%s/queue/.state/deterministic_done/", out_dir);
if (mkdir(tmp, 0700)) PFATAL("Unable to create '%s'", tmp);
ck_free(tmp);
/* Directory with the auto-selected dictionary entries. */
tmp = alloc_printf("%s/queue/.state/auto_extras/", out_dir);
if (mkdir(tmp, 0700)) PFATAL("Unable to create '%s'", tmp);
ck_free(tmp);
/* The set of paths currently deemed redundant. */
tmp = alloc_printf("%s/queue/.state/redundant_edges/", out_dir);
if (mkdir(tmp, 0700)) PFATAL("Unable to create '%s'", tmp);
ck_free(tmp);
/* The set of paths showing variable behavior. */
tmp = alloc_printf("%s/queue/.state/variable_behavior/", out_dir);
if (mkdir(tmp, 0700)) PFATAL("Unable to create '%s'", tmp);
ck_free(tmp);
/* Sync directory for keeping track of cooperating fuzzers. */
if (sync_id) {
tmp = alloc_printf("%s/.synced/", out_dir);
if (mkdir(tmp, 0700) && (!in_place_resume || errno != EEXIST))
PFATAL("Unable to create '%s'", tmp);
ck_free(tmp);
}
/* All recorded crashes. */
tmp = alloc_printf("%s/crashes", out_dir);
if (mkdir(tmp, 0700)) PFATAL("Unable to create '%s'", tmp);
ck_free(tmp);
/* All recorded hangs. */
tmp = alloc_printf("%s/hangs", out_dir);
if (mkdir(tmp, 0700)) PFATAL("Unable to create '%s'", tmp);
ck_free(tmp);
/* Generally useful file descriptors. */
dev_null_fd = open("/dev/null", O_RDWR);
if (dev_null_fd < 0) PFATAL("Unable to open /dev/null");
dev_urandom_fd = open("/dev/urandom", O_RDONLY);
if (dev_urandom_fd < 0) PFATAL("Unable to open /dev/urandom");
/* Gnuplot output file. */
tmp = alloc_printf("%s/plot_data", out_dir);
fd = open(tmp, O_WRONLY | O_CREAT | O_EXCL, 0600);
if (fd < 0) PFATAL("Unable to create '%s'", tmp);
ck_free(tmp);
plot_file = fdopen(fd, "w");
if (!plot_file) PFATAL("fdopen() failed");
fprintf(plot_file, "# unix_time, cycles_done, cur_path, paths_total, "
"pending_total, pending_favs, map_size, unique_crashes, "
"unique_hangs, max_depth, execs_per_sec\n");
/* ignore errors */
}
read_testcases
- 首先检查 in_dir/queue 的可访问权限,拿到 fd;
- 扫描 in_dir,对文件夹下的文件进行检查,根据in_dir/.state/deterministic_done/nl[i]->d_name判断其passwd_det值
- 调用 add_to_queue(fn. st.st_size, passwd_det) 添加到 queue 中
- 设置 last_path_time 为 0;设置 queued_at_start = queued_paths
/* Read all testcases from the input directory, then queue them for testing.
Called at startup. */
static void read_testcases(void) {
struct dirent **nl;
s32 nl_cnt;
u32 i;
u8* fn;
/* Auto-detect non-in-place resumption attempts. */
//检查 in_dir/queue 的可访问权限,拿到 fd
fn = alloc_printf("%s/queue", in_dir);
if (!access(fn, F_OK)) in_dir = fn; else ck_free(fn);
ACTF("Scanning '%s'...", in_dir);
/* We use scandir() + alphasort() rather than readdir() because otherwise,
the ordering of test cases would vary somewhat randomly and would be
difficult to control. */
nl_cnt = scandir(in_dir, &nl, NULL, alphasort);
if (nl_cnt < 0) {
if (errno == ENOENT || errno == ENOTDIR)
SAYF("\n" cLRD "[-] " cRST
"The input directory does not seem to be valid - try again. The fuzzer needs\n"
" one or more test case to start with - ideally, a small file under 1 kB\n"
" or so. The cases must be stored as regular files directly in the input\n"
" directory.\n");
PFATAL("Unable to open '%s'", in_dir);
}
if (shuffle_queue && nl_cnt > 1) {
ACTF("Shuffling queue...");
//打乱一开始构建的queue中的testcase的顺序
shuffle_ptrs((void**)nl, nl_cnt);
}
//遍历in_dir,然后调用add_to_queue函数将testcase加入到queue中
for (i = 0; i < nl_cnt; i++) {
struct stat st;
u8* fn = alloc_printf("%s/%s", in_dir, nl[i]->d_name);
u8* dfn = alloc_printf("%s/.state/deterministic_done/%s", in_dir, nl[i]->d_name);
u8 passed_det = 0;
free(nl[i]); /* not tracked */
if (lstat(fn, &st) || access(fn, R_OK))
PFATAL("Unable to access '%s'", fn);
/* This also takes care of . and .. */
if (!S_ISREG(st.st_mode) || !st.st_size || strstr(fn, "/README.testcases")) {
ck_free(fn);
ck_free(dfn);
continue;
}
if (st.st_size > MAX_FILE)
FATAL("Test case '%s' is too big (%s, limit is %s)", fn,
DMS(st.st_size), DMS(MAX_FILE));
/* Check for metadata that indicates that deterministic fuzzing
is complete for this entry. We don't want to repeat deterministic
fuzzing when resuming aborted scans, because it would be pointless
and probably very time-consuming. */
if (!access(dfn, F_OK)) passed_det = 1;
ck_free(dfn);
add_to_queue(fn, st.st_size, passed_det);
}
free(nl); /* not tracked */
if (!queued_paths) {
SAYF("\n" cLRD "[-] " cRST
"Looks like there are no valid test cases in the input directory! The fuzzer\n"
" needs one or more test case to start with - ideally, a small file under\n"
" 1 kB or so. The cases must be stored as regular files directly in the\n"
" input directory.\n");
FATAL("No usable test cases in '%s'", in_dir);
}
last_path_time = 0;
queued_at_start = queued_paths;
}
shuffle_ptrs
- 就是对指针进行混淆,打乱testcase原本的顺序
/* Shuffle an array of pointers. Might be slightly biased. */
static void shuffle_ptrs(void** ptrs, u32 cnt) {
u32 i;
for (i = 0; i < cnt - 2; i++) {
u32 j = i + UR(cnt - i);
void *s = ptrs[i];
ptrs[i] = ptrs[j];
ptrs[j] = s;
}
}
add_to_queue
- 将fname,file_size,testcase的depth,passed_det添加到queue中,并更新queue的相关指针
- 让queued_paths++;pending_not_fuzzed++;更新 last_path_time = get_cur_time();
/* Append new test case to the queue. */
static void add_to_queue(u8* fname, u32 len, u8 passed_det) {
struct queue_entry* q = ck_alloc(sizeof(struct queue_entry));
q->fname = fname;
q->len = len;
q->depth = cur_depth + 1;
q->passed_det = passed_det;
if (q->depth > max_depth) max_depth = q->depth;
if (queue_top) {
queue_top->next = q;
queue_top = q;
} else q_prev100 = queue = queue_top = q;
queued_paths++;
pending_not_fuzzed++;
cycles_wo_finds = 0;
/* Set next_100 pointer for every 100th element (index 0, 100, etc) to allow faster iteration. */
if ((queued_paths - 1) % 100 == 0 && queued_paths > 1) {
q_prev100->next_100 = q;
q_prev100 = q;
}
last_path_time = get_cur_time();
}
load_auto
- 加载生成的提取出来的字典的token
- 但我有点没太理解in_dir/.state/auto_extras/是怎么创建的,看下面的代码也只发现一开始setup_dirs_fds函数也只是创建了out_dir/queue/.state/auto_extras/,可能是要自己在in_dir添加这个???,等到进一步学习再解决这个疑惑
- 主要是有extradictionary和autodictionary两种字典,extradictionary是用户自己引入的,对应extras结构体数组,extras_cnt记录数量,而autodictionary对应a_extras结构体数组,a_extras_cnt用来计数
tmp = alloc_printf("%s/queue/.state/auto_extras/", out_dir);
if (mkdir(tmp, 0700)) PFATAL("Unable to create '%s'", tmp);
ck_free(tmp);
/* Load automatically generated extras. */
#define USE_AUTO_EXTRAS 50
#define MAX_AUTO_EXTRA 32
static void load_auto(void) {
u32 i;
for (i = 0; i < USE_AUTO_EXTRAS; i++) {
u8 tmp[MAX_AUTO_EXTRA + 1];
u8* fn = alloc_printf("%s/.state/auto_extras/auto_%06u", in_dir, i);
s32 fd, len;
fd = open(fn, O_RDONLY, 0600);
if (fd < 0) {
if (errno != ENOENT) PFATAL("Unable to open '%s'", fn);
ck_free(fn);
break;
}
/* We read one byte more to cheaply detect tokens that are too
long (and skip them). */
len = read(fd, tmp, MAX_AUTO_EXTRA + 1);
if (len < 0) PFATAL("Unable to read from '%s'", fn);
if (len >= MIN_AUTO_EXTRA && len <= MAX_AUTO_EXTRA)
maybe_add_auto(tmp, len);
close(fd);
ck_free(fn);
}
if (i) OKF("Loaded %u auto-discovered dictionary tokens.", i);
else OKF("No auto-generated dictionary tokens to reuse.");
}
maybe_add_auto
- 主要是判断这个auto字典是否和已经定义的interesting字典,以及用户自己引入的extradictionary是否重复,不重复就加到a_extras中
/* Helper function for maybe_add_auto() */
static inline u8 memcmp_nocase(u8* m1, u8* m2, u32 len) {
while (len--) if (tolower(*(m1++)) ^ tolower(*(m2++))) return 1;
return 0;
}
/* Maybe add automatic extra. */
static void maybe_add_auto(u8* mem, u32 len) {
u32 i;
/* Allow users to specify that they don't want auto dictionaries. */
// 如果用户不想使用词典,则直接返回
if (!MAX_AUTO_EXTRAS || !USE_AUTO_EXTRAS) return;
/* Skip runs of identical bytes. */
// 如果 token 所有字符都相同,则不加入字典
for (i = 1; i < len; i++)
if (mem[0] ^ mem[i]) break;
if (i == len) return;
/* Reject builtin interesting values. */
// 如果与内置的 interesting 表重复,则放弃。这里考虑了大小端
if (len == 2) {
i = sizeof(interesting_16) >> 1;
while (i--)
if (*((u16*)mem) == interesting_16[i] ||
*((u16*)mem) == SWAP16(interesting_16[i])) return;
}
if (len == 4) {
i = sizeof(interesting_32) >> 2;
while (i--)
if (*((u32*)mem) == interesting_32[i] ||
*((u32*)mem) == SWAP32(interesting_32[i])) return;
}
/* Reject anything that matches existing extras. Do a case-insensitive
match. We optimize by exploiting the fact that extras[] are sorted
by size. */
for (i = 0; i < extras_cnt; i++)
if (extras[i].len >= len) break;
for (; i < extras_cnt && extras[i].len == len; i++)
if (!memcmp_nocase(extras[i].data, mem, len)) return;
/* Last but not least, check a_extras[] for matches. There are no
guarantees of a particular sort order. */
// 与自动发现的 extra(即 a_extras 数组)对比,去重
auto_changed = 1;
for (i = 0; i < a_extras_cnt; i++) {
if (a_extras[i].len == len && !memcmp_nocase(a_extras[i].data, mem, len)) {
a_extras[i].hit_cnt++;
goto sort_a_extras;
}
}
/* At this point, looks like we're dealing with a new entry. So, let's
append it if we have room. Otherwise, let's randomly evict some other
entry from the bottom half of the list. */
// 若 a_extras 数量小于 500,则插入
if (a_extras_cnt < MAX_AUTO_EXTRAS) {
a_extras = ck_realloc_block(a_extras, (a_extras_cnt + 1) *
sizeof(struct extra_data));
a_extras[a_extras_cnt].data = ck_memdup(mem, len);
a_extras[a_extras_cnt].len = len;
a_extras_cnt++;
// 随机选一个 250 ~ 499 之间的 token,将其驱逐,替换为新来的 token
} else {
i = MAX_AUTO_EXTRAS / 2 +
UR((MAX_AUTO_EXTRAS + 1) / 2);
ck_free(a_extras[i].data);
a_extras[i].data = ck_memdup(mem, len);
a_extras[i].len = len;
a_extras[i].hit_cnt = 0;
}
sort_a_extras:
/* First, sort all auto extras by use count, descending order. */
qsort(a_extras, a_extras_cnt, sizeof(struct extra_data),
compare_extras_use_d);
/* Then, sort the top USE_AUTO_EXTRAS entries by size. */
qsort(a_extras, MIN(USE_AUTO_EXTRAS, a_extras_cnt),
sizeof(struct extra_data), compare_extras_len);
}
pivot_inputs
- AFL 本身在 fuzz 过程中会涉及到很重的各种磁盘读取操作,如果输入输出在不同的路径下,会加重磁盘读取的一系列操作。这样做是为了减轻磁盘读取负担,也是为了加快 fuzz 的效率。而之所以设置成输入输出在不同的目录下,应该只是为了归档
load_extras
- 主要功能从 extras 目录中读取 extras 并按大小对其进行排序,代码比较琐碎就不具体分析了
find_timeout
- 若是 in-place resume(通过 “-i -” 选项指定),则继承上次 fuzz 的 exec_timeout
detect_file_args
- 略
setup_stdio_file
- 如果没有设置被fuzz的文件(没有用-f参数)才进入这个if语句,一般用不到
check_binary
- 检查目标程序,看找不找得到、在不在 /tmp 等