2020-04-07 ycdxsb CTF / pwn / pwnable.kr / Toddler'sBottle6 分钟读完 (大约 963 个字) 0次访问

pwnable.kr —— memcpy

question#

Are you tired of hacking?, take some rest here.
Just help me out with my small experiment regarding memcpy performance. 
after that, flag is yours.

http://pwnable.kr/bin/memcpy.c

ssh memcpy@pwnable.kr -p2222 (pw:guest)

memcpy.c#

// compiled with : gcc -o memcpy memcpy.c -m32 -lm
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <signal.h>
#include <unistd.h>
#include <sys/mman.h>
#include <math.h>

unsigned long long rdtsc(){
        asm("rdtsc");
}

char* slow_memcpy(char* dest, const char* src, size_t len){
	int i;
	for (i=0; i<len; i++) {
		dest[i] = src[i];
	}
	return dest;
}

char* fast_memcpy(char* dest, const char* src, size_t len){
	size_t i;
	// 64-byte block fast copy
	if(len >= 64){
		i = len / 64;
		len &= (64-1);
		while(i-- > 0){
			__asm__ __volatile__ (
			"movdqa (%0), %%xmm0\n"
			"movdqa 16(%0), %%xmm1\n"
			"movdqa 32(%0), %%xmm2\n"
			"movdqa 48(%0), %%xmm3\n"
			"movntps %%xmm0, (%1)\n"
			"movntps %%xmm1, 16(%1)\n"
			"movntps %%xmm2, 32(%1)\n"
			"movntps %%xmm3, 48(%1)\n"
			::"r"(src),"r"(dest):"memory");
			dest += 64;
			src += 64;
		}
	}

	// byte-to-byte slow copy
	if(len) slow_memcpy(dest, src, len);
	return dest;
}

int main(void){

	setvbuf(stdout, 0, _IONBF, 0);
	setvbuf(stdin, 0, _IOLBF, 0);

	printf("Hey, I have a boring assignment for CS class.. :(\n");
	printf("The assignment is simple.\n");

	printf("-----------------------------------------------------\n");
	printf("- What is the best implementation of memcpy?        -\n");
	printf("- 1. implement your own slow/fast version of memcpy -\n");
	printf("- 2. compare them with various size of data         -\n");
	printf("- 3. conclude your experiment and submit report     -\n");
	printf("-----------------------------------------------------\n");

	printf("This time, just help me out with my experiment and get flag\n");
	printf("No fancy hacking, I promise :D\n");

	unsigned long long t1, t2;
	int e;
	char* src;
	char* dest;
	unsigned int low, high;
	unsigned int size;
	// allocate memory
	char* cache1 = mmap(0, 0x4000, 7, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
	char* cache2 = mmap(0, 0x4000, 7, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
	src = mmap(0, 0x2000, 7, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);

	size_t sizes[10];
	int i=0;

	// setup experiment parameters
	for(e=4; e<14; e++){	// 2^13 = 8K
		low = pow(2,e-1);
		high = pow(2,e);
		printf("specify the memcpy amount between %d ~ %d : ", low, high);
		scanf("%d", &size);
		if( size < low || size > high ){
			printf("don't mess with the experiment.\n");
			exit(0);
		}
		sizes[i++] = size;
	}

	sleep(1);
	printf("ok, lets run the experiment with your configuration\n");
	sleep(1);

	// run experiment
	for(i=0; i<10; i++){
		size = sizes[i];
		printf("experiment %d : memcpy with buffer size %d\n", i+1, size);
		dest = malloc( size );

		memcpy(cache1, cache2, 0x4000);		// to eliminate cache effect
		t1 = rdtsc();
		slow_memcpy(dest, src, size);		// byte-to-byte memcpy
		t2 = rdtsc();
		printf("ellapsed CPU cycles for slow_memcpy : %llu\n", t2-t1);

		memcpy(cache1, cache2, 0x4000);		// to eliminate cache effect
		t1 = rdtsc();
		fast_memcpy(dest, src, size);		// block-to-block memcpy
		t2 = rdtsc();
		printf("ellapsed CPU cycles for fast_memcpy : %llu\n", t2-t1);
		printf("\n");
	}

	printf("thanks for helping my experiment!\n");
	printf("flag : ----- erased in this source code -----\n");
	return 0;
}

可以看到是依次使用fast_memcpy和slow_memcpy 函数比较使用时间

analyse#

随便输入范围内数字会出现segmentation fault，问题出在汇编指令movntps。

查阅资料可以看到，当使用这个指令时，内存必须按照16字节对齐

The destination operand is a 128-bit or 256-bit memory location. The memory operand must be aligned on a 16-byte (128-bit version) or 32-byte (VEX.256 encoded version) boundary otherwise a general-protection exception (#GP) will be generated.

而用户使用malloc申请内存却是以8字节对齐的，假如用户申请内存大小x，那么分配后的块大小为8*(int((x+4)/8)+1)。

因此对于给定大小low和high限定，我们可以计算得到合适的内存大小x

def get_input(low,high):
    for i in xrange(low,high):
        if ((i+4)%16>=9) or ((i+4)%16==0):
            return i

get flag#

from pwn import *
def get_input(low,high):
    for i in xrange(low,high):
        if ((i+4)%16>=9) or ((i+4)%16==0):
            return i

#context.log_level = 'debug'
p = remote("pwnable.kr",9022)
#p = process('./memcpy')
for i in range(0,10):
    import time
    time.sleep(1)
    result = p.recv()
    result = result.split("\n")[-1]
    low = int(result.split(" ")[5])
    high = int(result.split(" ")[7])
    output = get_input(low,high)
    print "info:%d %d , input: %d"%(low,high,output)
    p.sendline(str(output))
p.interactive()

 ↵ 1  python CTF/Challenges/pwnable.kr/part1/memcpy/memcpy.py
[+] Opening connection to pwnable.kr on port 9022: Done
info:8 16 , input: 8
info:16 32 , input: 21
info:32 64 , input: 37
info:64 128 , input: 69
info:128 256 , input: 133
info:256 512 , input: 261
info:512 1024 , input: 517
info:1024 2048 , input: 1029
info:2048 4096 , input: 2053
info:4096 8192 , input: 4101
[*] Switching to interactive mode
ok, lets run the experiment with your configuration
experiment 1 : memcpy with buffer size 8
ellapsed CPU cycles for slow_memcpy : 2036
ellapsed CPU cycles for fast_memcpy : 172

experiment 2 : memcpy with buffer size 21
ellapsed CPU cycles for slow_memcpy : 220
ellapsed CPU cycles for fast_memcpy : 252

experiment 3 : memcpy with buffer size 37
ellapsed CPU cycles for slow_memcpy : 336
ellapsed CPU cycles for fast_memcpy : 388

experiment 4 : memcpy with buffer size 69
ellapsed CPU cycles for slow_memcpy : 508
ellapsed CPU cycles for fast_memcpy : 182

experiment 5 : memcpy with buffer size 133
ellapsed CPU cycles for slow_memcpy : 1042
ellapsed CPU cycles for fast_memcpy : 152

experiment 6 : memcpy with buffer size 261
ellapsed CPU cycles for slow_memcpy : 1854
ellapsed CPU cycles for fast_memcpy : 204

experiment 7 : memcpy with buffer size 517
ellapsed CPU cycles for slow_memcpy : 3658
ellapsed CPU cycles for fast_memcpy : 186

experiment 8 : memcpy with buffer size 1029
ellapsed CPU cycles for slow_memcpy : 7146
ellapsed CPU cycles for fast_memcpy : 374

experiment 9 : memcpy with buffer size 2053
ellapsed CPU cycles for slow_memcpy : 14326
ellapsed CPU cycles for fast_memcpy : 680

experiment 10 : memcpy with buffer size 4101
ellapsed CPU cycles for slow_memcpy : 30404
ellapsed CPU cycles for fast_memcpy : 1284

thanks for helping my experiment!
flag : 1_w4nn4_br34K_th3_m3m0ry_4lignm3nt
[*] Got EOF while reading in interactive

flag:1_w4nn4_br34K_th3_m3m0ry_4lignm3nt

pwnable.kr —— memcpy

question#

memcpy.c#

analyse#

get flag#

喜欢这篇文章？打赏一下作者吧

评论

链接

归档

订阅更新

分类

最新文章

标签