一,介绍

比赛题目很简单:构造一个程序,在 stdout 上打印出自身的 MD5,程序越短越好。按最终程序文件大小字节数排名,文件越小,排名越靠前。 只能使用 ld-linux-x86-64.so, libc.so, libdl.so, libgcc_s.so, libm.so, libstdc++.so 。 禁止了 socket, shmget, fork, execvc 等 syscall 。

汇编高手如云,本人只做到 752 字节,只拿到 27 名。 但忙活好几天,学到不少东西,也有苦劳,还是值得记录一下。

基本是纯 C 实现,没有动用汇编。

二,用过的一些思路和资源

基于这个 MD5 实现改造 https://gist.github.com/creationix/4710780

https://www.nayuki.io/page/fast-md5-hash-implementation-in-x86-assembly

参照 musl 和 rt0 ,去除对 glibc 依赖

  1. musl libc https://www.musl-libc.org/
  2. rt0 : https://github.com/lpsantil/rt0

三,没有成功的一些思路

  1. 思路,利用内核的 MD5 计算代码: grep -i md5 linux-5.6.2 只找到 2处,

    1. TCP_MD5SIG , CONFIG_TCP_MD5SIG : “TCP: MD5 Signature Option support (RFC2385)” 一个冷门特性,看起来通过 socket 才能执行。

    2. AF_ALG Linux 的 crypto api,通过 socket 形式暴露 api, 但是 socket 被禁用了。 https://www.chronox.de/libkcapi.html

  2. 构造碰撞,hardcode 直接 print 出来

    https://github.com/corkami/collisions

    https://www.rogdham.net/2017/03/12/gif-md5-hashquine.en

    https://github.com/Rogdham/gif-md5-hashquine

    http://www.win.tue.nl/hashclash/ 尝试了 fastcoll_v1 ,但是 fastcoll 构造1个字节的冲突要 2 block = 128 字节 ,则 16 字节就要 2048 字节,还不如 C 语言直接算。

  3. 通过某些 ipc 机制,绕过评判。 试了简单的 shmget, 不成功。

三. 代码

后续借鉴思路,有优化

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#define SYS_write 1
#define SYS_exit 60

inline long syscall3(long n, long a0, long a1, long a2) {
    long ret;
    __asm__ volatile("syscall" : "=a"(ret) : "a"(n), "D"(a0), "S"(a1), "d"(a2) : "rcx", "r11", "memory");
    return (ret);
}
inline long syscall1(long n, long a0) {
    long ret;
    __asm__ volatile("syscall" : "=a"(ret) : "a"(n), "D"(a0) : "rcx", "r11", "memory");
    return (ret);
}

static int write(int f, const char* d, int l) {
    int ret = syscall3(SYS_write, f, (long)(d), l);
    return (ret);
}

void _exit(int r) { syscall1(SYS_exit, r); }

typedef unsigned uint32_t;
typedef unsigned char uint8_t;
typedef unsigned long size_t;
#define NULL 0  // In words

#define memcpy(dest, src, n)                      \
    for (size_t i = 0; i < n; ++i) {              \
        ((char*)dest)[i] = ((const char*)src)[i]; \
    }

// leftrotate function definition
#define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c))))

static void md5_hash(uint8_t* initial_msg, const size_t initial_len, uint32_t hash[4]) {
    // Note: All variables are unsigned 32 bit and wrap modulo 2^32 when calculating

    // r specifies the per-round shift amounts
    const static uint8_t r[] = {
        7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21,
    };

    // Use binary integer part of the sines of integers (in radians) as constants// Initialize variables:
    uint32_t k[64];
    for (int i = 0; i < 64; ++i) {
        double f = 1.0 + i;
        asm("fsin;\n\t"
            "fabs;\n\t"
            : "=t"(f)
            : "0"(f));
        f *= 4294967296.0;
        k[i] = (uint32_t)f;
    }

    // Pre-processing: adding a single 1 bit
    // append "1" bit to message
    /* Notice: the input bytes are considered as bits strings,
       where the first bit is the most significant bit of the byte.[37] */

    // Pre-processing: padding with zeros
    // append "0" bit until message length in bit ≡ 448 (mod 512)
    // append length mod (2 pow 64) to message

    const int new_len = ((((initial_len + 8) / 64) + 1) * 64) - 8;

    // Message (to prepare)
    uint8_t* msg = initial_msg msg[initial_len] = 128;  // write the "1" bit

    uint32_t bits_len = 8 * initial_len;  // note, we append the len
    memcpy(msg + new_len, &bits_len, 4);  // in bits at the end of the buffer

    // Process the message in successive 512-bit chunks:
    // for each 512-bit chunk of message:
    for (int offset = 0; offset < new_len; offset += 64) {
        // break chunk into sixteen 32-bit words w[j], 0 ≤ j ≤ 15
        uint32_t* w = (uint32_t*)(msg + offset);

        // Initialize hash value for this chunk:
        uint32_t a = hash[0];
        uint32_t b = hash[1];
        uint32_t c = hash[2];
        uint32_t d = hash[3];

        // Main loop:
        uint32_t i;
        for (i = 0; i < 64; i++) {
            uint32_t f, g;

            if (i < 16) {
                f = (b & c) | ((~b) & d);
                g = i;
            } else if (i < 32) {
                f = (d & b) | ((~d) & c);
                g = (5 * i + 1) % 16;
            } else if (i < 48) {
                f = b ^ c ^ d;
                g = (3 * i + 5) % 16;
            } else {
                f = c ^ (b | (~d));
                g = (7 * i) % 16;
            }

            uint32_t temp = d;
            d = c;
            c = b;
            b = b + LEFTROTATE((a + f + k[i] + w[g]), r[i / 4 / 4 * 4 + i % 4]);
            a = temp;
        }

        // Add this chunk's hash to result so far:

        hash[0] += a;
        hash[1] += b;
        hash[2] += c;
        hash[3] += d;
    }
}

static char xdigits(char c) {
    if ((c >= 0) && (c <= 9)) {
        return '0' + c;
    }
    return 'a' + (c - 10);
}

void _start(void) __attribute__((noreturn));

void _start(void) {
    uint32_t hash[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476};

    char* addr = (char*)0x400000;
    const size_t len = 688;
    md5_hash(addr, len, hash);

    unsigned char* md = (unsigned char*)&hash[0];
    char buff[32];
    for (int i = 0; i < 32; ++i) {
        unsigned x = md[(i >> 1)];
        x >>= (i & 1 ? 0 : 4);
        buff[i] = xdigits(x & 15);
    }
    write(1, &buff[0], sizeof(buff));
    _exit(0);
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
CFLAGS="-static -Os -Wl,--omagic -D__RT0_WITH_FASTER_SYSCALL__ -ffunction-sections -Wl,--gc-sections -nostartfiles -nodefaultlibs -nostdlib -nostdinc -Wl,--build-id=none -fomit-frame-pointer -fno-stack-protector -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-unroll-loops -fmerge-all-constants -fno-ident -mfpmath=sse -mfancy-math-387  -finline-functions-called-once -I ./ "

gcc   $CFLAGS  print_my_md5.c  -S -fverbose-asm
gcc   $CFLAGS  print_my_md5.s  -o print_my_md5 

strip -R .comment print_my_md5
strip -R .bss print_my_md5
ls -l ./print_my_md5
./ELFkickers/sstrip/sstrip print_my_md5
./print_my_md5
echo
md5sum ./print_my_md5
ls -l ./print_my_md5