汉明距离-[De1CTF2019]xorz - CRYPTO

# 汉明距离

汉明距离以美国数学家理查德・卫斯里・汉明的名字命名，表示两个相同长度的字符串在相同位置上不同字符的个数。
用 d (x,y) 来表示 x 和 y 两个字符串的汉明距离。汉明距离可以用来计算两个文本之间的相似度，根据不同字符的个数来判断两个文本是否相似。

# 例题

[De1CTF2019]xorz

	from itertools import *
	from data import flag,plain

	key=flag.strip("de1ctf{").strip("}")
	assert(len(key)<38)
	salt="WeAreDe1taTeam"
	ki=cycle(key)
	si=cycle(salt)
	cipher = ''.join([hex(ord(p) ^ ord(next(ki)) ^ ord(next(si)))[2:].zfill(2) for p in plain])
	print cipher
	# output:
	# 49380d773440222d1b421b3060380c3f403c3844791b202651306721135b6229294a3c3222357e766b2f15561b35305e3c3b670e49382c295c6c170553577d3a2b791470406318315d753f03637f2b614a4f2e1c4f21027e227a4122757b446037786a7b0e37635024246d60136f7802543e4d36265c3e035a725c6322700d626b345d1d6464283a016f35714d434124281b607d315f66212d671428026a4f4f79657e34153f3467097e4e135f187a21767f02125b375563517a3742597b6c394e78742c4a725069606576777c314429264f6e330d7530453f22537f5e3034560d22146831456b1b72725f30676d0d5c71617d48753e26667e2f7a334c731c22630a242c7140457a42324629064441036c7e646208630e745531436b7c51743a36674c4f352a5575407b767a5c747176016c0676386e403a2b42356a727a04662b4446375f36265f3f124b724c6e346544706277641025063420016629225b43432428036f29341a2338627c47650b264c477c653a67043e6766152a485c7f33617264780656537e5468143f305f4537722352303c3d4379043d69797e6f3922527b24536e310d653d4c33696c635474637d0326516f745e610d773340306621105a7361654e3e392970687c2e335f3015677d4b3a724a4659767c2f5b7c16055a126820306c14315d6b59224a27311f747f336f4d5974321a22507b22705a226c6d446a37375761423a2b5c29247163046d7e47032244377508300751727126326f117f7a38670c2b23203d4f27046a5c5e1532601126292f577776606f0c6d0126474b2a73737a41316362146e581d7c1228717664091c

salt 和 key 是循环的

salt 已知先把他去掉

就剩下 key 和 plain 异或了这两个都不知道但是 key 是循环的，可以用汉明距离爆破

四字节为单位爆破

	import string
	from binascii import unhexlify, hexlify
	from itertools import *

	def bxor(a, b): # xor two byte strings of different lengths
	if len(a) > len(b):
	return bytes([x ^ y for x, y in zip(a[:len(b)], b)])
	else:
	return bytes([x ^ y for x, y in zip(a, b[:len(a)])])

	def hamming_distance(b1, b2):
	differing_bits = 0
	for byte in bxor(b1, b2):
	differing_bits += bin(byte).count("1")
	return differing_bits

	def break_single_key_xor(text):
	key = 0
	possible_space = 0
	max_possible = 0
	letters = string.ascii_letters.encode('ascii')
	for a in range(0, len(text)):
	maxpossible = 0
	for b in range(0, len(text)):
	if(a == b):
	continue
	c = text[a] ^ text[b]
	if c not in letters and c != 0:
	continue
	maxpossible += 1
	if maxpossible > max_possible:
	max_possible = maxpossible
	possible_space = a
	key = text[possible_space] ^ 0x20
	return chr(key)

	salt = "WeAreDe1taTeam"
	si = cycle(salt)
	b = unhexlify(b'49380d773440222d1b421b3060380c3f403c3844791b202651306721135b6229294a3c3222357e766b2f15561b35305e3c3b670e49382c295c6c170553577d3a2b791470406318315d753f03637f2b614a4f2e1c4f21027e227a4122757b446037786a7b0e37635024246d60136f7802543e4d36265c3e035a725c6322700d626b345d1d6464283a016f35714d434124281b607d315f66212d671428026a4f4f79657e34153f3467097e4e135f187a21767f02125b375563517a3742597b6c394e78742c4a725069606576777c314429264f6e330d7530453f22537f5e3034560d22146831456b1b72725f30676d0d5c71617d48753e26667e2f7a334c731c22630a242c7140457a42324629064441036c7e646208630e745531436b7c51743a36674c4f352a5575407b767a5c747176016c0676386e403a2b42356a727a04662b4446375f36265f3f124b724c6e346544706277641025063420016629225b43432428036f29341a2338627c47650b264c477c653a67043e6766152a485c7f33617264780656537e5468143f305f4537722352303c3d4379043d69797e6f3922527b24536e310d653d4c33696c635474637d0326516f745e610d773340306621105a7361654e3e392970687c2e335f3015677d4b3a724a4659767c2f5b7c16055a126820306c14315d6b59224a27311f747f336f4d5974321a22507b22705a226c6d446a37375761423a2b5c29247163046d7e47032244377508300751727126326f117f7a38670c2b23203d4f27046a5c5e1532601126292f577776606f0c6d0126474b2a73737a41316362146e581d7c1228717664091c')
	plain = ''.join([hex(ord(c) ^ ord(next(si)))[2:].zfill(2) for c in b.decode()])
	b = unhexlify(plain)
	print(plain)

	normalized_distances = []

	for KEYSIZE in range(2, 40):
	# 我们取其中前 6 段计算平局汉明距离
	b1 = b[: KEYSIZE]
	b2 = b[KEYSIZE: KEYSIZE * 2]
	b3 = b[KEYSIZE * 2: KEYSIZE * 3]
	b4 = b[KEYSIZE * 3: KEYSIZE * 4]
	b5 = b[KEYSIZE * 4: KEYSIZE * 5]
	b6 = b[KEYSIZE * 5: KEYSIZE * 6]

	normalized_distance = float(
	hamming_distance(b1, b2) +
	hamming_distance(b2, b3) +
	hamming_distance(b3, b4) +
	hamming_distance(b4, b5) +
	hamming_distance(b5, b6)
	) / (KEYSIZE * 5)
	normalized_distances.append(
	(KEYSIZE, normalized_distance)
	)
	normalized_distances = sorted(normalized_distances, key=lambda x: x[1])

	for KEYSIZE, _ in normalized_distances[:5]:
	block_bytes = [[] for _ in range(KEYSIZE)]
	for i, byte in enumerate(b):
	block_bytes[i % KEYSIZE].append(byte)
	keys = ''
	try:
	for bbytes in block_bytes:
	keys += break_single_key_xor(bbytes)
	key = bytearray(keys * len(b), "utf-8")
	plaintext = bxor(b, key)
	print("keysize:", KEYSIZE)
	print("key is:", keys, "n")
	s = bytes.decode(plaintext)
	print(s)
	except Exception:
	continue

从大佬那里拿来的代码

刷题

# 汉明距离

# 例题

操作系统

重合指数