To1in 的BUU刷题记录——5(梅森旋转专题)
10.[GKCTF 2021]Random
import random
from hashlib import md5
def get_mask():
file = open("random.txt","w")
for i in range(104):
file.write(str(random.getrandbits(32))+"\n")
file.write(str(random.getrandbits(64))+"\n")
file.write(str(random.getrandbits(96))+"\n")
file.close()
get_mask()
flag = md5(str(random.getrandbits(32)).encode()).hexdigest()
print(flag)
一道随机数题,查阅资料可以知道,python中使用的随机数生成器是MT19937。这里我们稍微详细一些,研究一下这个伪随机数生成器。
梅森旋转算法(Mersenne twister)
梅森旋转算法(Mersenne twister),可以快速产生高质量的伪随机数,修正了古典随机数发生算法的很多缺陷。常见的两种为基于32位的 MT19937和基于64位的 MT19937-64。
由于梅森旋转算法是利用线性反馈移位寄存器(LFSR)产生随机数的,对于LFRS有结论:一个 kk 位的移位寄存器,选取合适的特征多项式(即加1为本原多项式)时,取得最大周期
整个算法主要分为三个阶段:
- 第一阶段:获得基础的梅森旋转链;
- 第二阶段:对于旋转链进行旋转算法;
- 第三阶段:对于旋转算法所得的结果进行处理;
32位的梅森旋转算法能够产生周期为
:参与梅森旋转的随机数个数; : 之间的整数; : 之间的整数; : 的常矩阵; : 的最高 比特组成的数(低位补零); : 的最低 比特组成的数(高位补零)。
首先算法会根基随机数种子初始化
然后从
在MT中,A的定义是这样的
所以
接下来我们需要把每次旋转的结果提取出来,提取方法如下
此处,
最后我们上一下完整的代码。
def _int32(x):
return int(0xFFFFFFFF & x)
class MT19937:
# 根据seed初始化624的state
def __init__(self, seed):
self.mt = [0] * 624
self.mt[0] = seed
self.mti = 0
for i in range(1, 624):
self.mt[i] = _int32(1812433253 * (self.mt[i - 1] ^ self.mt[i - 1] >> 30) + i)
# 提取伪随机数
def extract_number(self):
if self.mti == 0:
self.twist()
y = self.mt[self.mti]
y = y ^ y >> 11
y = y ^ y << 7 & 2636928640
y = y ^ y << 15 & 4022730752
y = y ^ y >> 18
self.mti = (self.mti + 1) % 624
return _int32(y)
# 对状态进行旋转
def twist(self):
for i in range(0, 624):
y = _int32((self.mt[i] & 0x80000000) + (self.mt[(i + 1) % 624] & 0x7fffffff))
self.mt[i] = (y >> 1) ^ self.mt[(i + 397) % 624]
if y % 2 != 0:
self.mt[i] = self.mt[i] ^ 0x9908b0df
上面我们简单介绍了梅森旋转算法。而梅森旋转算法实际上仍然是一个伪随机数生成算法,而不是密码学上安全的随机数。从梅森旋转算法的结构上说,它的提取算法完全基于异或运算,而我们知道异或是一个可逆的运算,所以提取的过程可逆。这就意味着我们在获得输出后是可以逆推出内部寄存器的状态的,如果我们获得了连续的寄存器状态,那么我们就可以预测接下来的随机数。
我们先以向右移位后异或为例,先观察一个例子
value: 1101 0010
shifted: 0001 1010 # 010 (>> 3)
result: 1100 1000
可以看到result中最高的3位没有发生变化,因此我们把result的最高三位取出,让result和1110 000进行&运算。然后我们把运算后的结果右移三位,再和result异或,我们就可以得到value的最高6位,如此循环,就可以得到value的所有位。对于左移异或也有类似的结论。于是我们就可以写出求逆代码。
class TemperInverser:
__b = 0x9d2c5680
__c = 0xefc60000
__kMaxBits = 0xffffffff
def __inverse_right_shift_xor(self, value, shift):
i, result = 0, 0
while i * shift < 32:
part_mask = ((self.__kMaxBits << (32 - shift)) & self.__kMaxBits) >> (i * shift)
part = value & part_mask
value ^= part >> shift
result |= part
i += 1
return result
def __inverse_left_shift_xor(self, value, shift, mask):
i, result = 0, 0
while i * shift < 32:
part_mask = (self.__kMaxBits >> (32 - shift)) << (i * shift)
part = value & part_mask
value ^= (part << shift) & mask
result |= part
i += 1
return result
def __inverse_temper(self, tempered):
value = tempered
value = self.__inverse_right_shift_xor(value, 18)
value = self.__inverse_left_shift_xor(value, 15, self.__c)
value = self.__inverse_left_shift_xor(value, 7, self.__b)
value = self.__inverse_right_shift_xor(value, 11)
return value
def __call__(self, tempered):
return self.__inverse_temper(tempered)
所以,如果我们可以获得完整的624个32bit输出,就可以通过求逆得到624个寄存器状态,然后就可以预测下一个输出了。这里再放一个用矩阵求的算法,具体就不分析了,顺手贴一下这次blog的reference。
from sage.all import *
from random import Random
def buildT():
rng = Random()
T = matrix(GF(2),32,32)
for i in range(32):
s = [0]*624
# 构造特殊的state
s[0] = 1<<(31-i)
rng.setstate((3,tuple(s+[0]),None))
tmp = rng.getrandbits(32)
# 获取T矩阵的每一行
row = vector(GF(2),[int(x) for x in bin(tmp)[2:].zfill(32)])
T[i] = row
return T
def reverse(T,leak):
Z = vector(GF(2),[int(x) for x in bin(leak)[2:].zfill(32)])
X = T.solve_left(Z)
state = int(''.join([str(i) for i in X]),2)
return state
def test():
rng = Random()
# 泄露信息
leak = [rng.getrandbits(32) for i in range(32)]
originState = [i for i in rng.getstate()[1][:32]]
# 构造矩阵T
T = buildT()
recoverState = [reverse(T,i) for i in leak]
print(recoverState==originState)
test()
#reference https://www.anquanke.com/post/id/205861#h2-7
然后我们再说说求之前的随机数,这时我们就需要获得前一组的state状态。
def twist(self):
for i in range(0, 624):
y = _int32((self.mt[i] & 0x80000000) + (self.mt[(i + 1) % 624] & 0x7fffffff))
self.mt[i] = (y >> 1) ^ self.mt[(i + 397) % 624]
if y % 2 != 0:
self.mt[i] = self.mt[i] ^ 0x9908b0df
通过对代码的分析我们可以知道,新生成的state[i]只和原来的state[i], 以及state[i+1], state[i+397]有关。我们可以自己试一组数据(还是reference里提供的):
11100110110101000100101111000001 // state[i]
10101110111101011001001001011111 // state[i + 1]
11101010010001001010000001001001 // state[i + 397]
1. 10101110111101011001001001011111 // y = state[i] & 0x80000000 | state[i + 1] & 0x7fffffff
2. 01010111011110101100100100101111 // next = y >>> 1
3. 11001110011100100111100111110000 // next ^= 0x9908b0df
# 0x9908b0df => 10011001000010001011000011011111
4. 00100100001101101101100110111001 // next ^= state[i + 397]
其中1 2 4一定会执行(因为异或的顺序不影响结果,所以把异或放在最后了),而第三步是根据y的奇偶性判读是否执行。但是因为4是把state[i]^state[i+397],所以我们可以把新的state[i]和state[i+397]进行异或来进行一些判断。注意到0x9908b0df的首位是1,而2执行后y的最高位一定是0,所以如果执行了3,那么结果的首位会变成0。所以我们可以根据逆异或后的结果判断是否执行了3。进而我们可以推出2后的y,这时y包含了state[i]的第一位和state[i+1]的2-31位,而我们可以根据是否执行了3,获得没有位移的y的最后1位即state[i+1]的第32位,要获得state[i]剩下的31位信息,只需要把现在的state[i-1]和state[i+396]进行异或,然后我们就可以根据上面所说,得到state[i]的31位信息。当i=0时,我们已经知道了其他所有的state[i],这时,我们只要利用state[-1]也就是state[623]获得state[0]的后31位。
def backtrace(cur):
high = 0x80000000
low = 0x7fffffff
mask = 0x9908b0df
state = cur
for i in range(623,-1,-1):
tmp = state[i]^state[(i+397)%624]
# recover Y,tmp = Y
if tmp & high == high:
tmp ^= mask
tmp <<= 1
tmp |= 1
else:
tmp <<=1
# recover highest bit
res = tmp&high
# recover other 31 bits,when i =0,it just use the method again it so beautiful!!!!
tmp = state[i-1]^state[(i+396)%624]
# recover Y,tmp = Y
if tmp & high == high:
tmp ^= mask
tmp <<= 1
tmp |= 1
else:
tmp <<=1
res |= (tmp)&low
state[i] = res
return state
最后,我们来说说逆init这个事情,这里是根据第一次的state逆seed
def _int32(x):
return int(0xFFFFFFFF & x)
def init(seed):
mt = [0] * 624
mt[0] = seed
for i in range(1, 624):
mt[i] = _int32(1812433253 * (mt[i - 1] ^ mt[i - 1] >> 30) + i)
return mt
看到关键的第8行,通过前面的分析我们知道,mt[i-1]^mt[i-1]>>30是可逆的,而_int32实际上是取32位,也可以当作
from gmpy2 import invert
def _int32(x):
return int(0xFFFFFFFF & x)
def init(seed):
mt = [0] * 624
mt[0] = seed
for i in range(1, 624):
mt[i] = _int32(1812433253 * (mt[i - 1] ^ mt[i - 1] >> 30) + i)
return mt
seed = 2080737669
def invert_right(res,shift):
tmp = res
for i in range(32//shift):
res = tmp^res>>shift
return _int32(res)
def recover(last):
n = 1<<32
inv = invert(1812433253,n)
for i in range(623,0,-1):
last = ((last-i)*inv)%n
last = invert_right(last,30)
return last
state = init(seed)
print(recover(state[-1]) == seed)
最后上一下总的tool(如果使用有问题记得告诉我,我还没全部测过)
from gmpy2 import invert
def _int32(x):
return int(0xFFFFFFFF & x)
class anti_MT19937:
def __init__(self, seed=0):
self.mt = [0] * 624
self.mt[0] = seed
self.mti = 0
for i in range(1, 624):
self.mt[i] = _int32(1812433253 * (self.mt[i - 1] ^ self.mt[i - 1] >> 30) + i)
def getstate(self,op=False):
if self.mti == 0 and op==False:
self.twist()
y = self.mt[self.mti]
y = y ^ y >> 11
y = y ^ y << 7 & 2636928640
y = y ^ y << 15 & 4022730752
y = y ^ y >> 18
self.mti = (self.mti + 1) % 624
return _int32(y)
def twist(self):
for i in range(0, 624):
y = _int32((self.mt[i] & 0x80000000) + (self.mt[(i + 1) % 624] & 0x7fffffff))
self.mt[i] = (y >> 1) ^ self.mt[(i + 397) % 624]
if y % 2 != 0:
self.mt[i] = self.mt[i] ^ 0x9908b0df
def inverse_right(self,res, shift, mask=0xffffffff, bits=32):
tmp = res
for i in range(bits // shift):
tmp = res ^ tmp >> shift & mask
return tmp
def inverse_left(self,res, shift, mask=0xffffffff, bits=32):
tmp = res
for i in range(bits // shift):
tmp = res ^ tmp << shift & mask
return tmp
def extract_number(self,y):
y = y ^ y >> 11
y = y ^ y << 7 & 2636928640
y = y ^ y << 15 & 4022730752
y = y ^ y >> 18
return y&0xffffffff
def recover(self,y):
y = self.inverse_right(y,18)
y = self.inverse_left(y,15,4022730752)
y = self.inverse_left(y,7,2636928640)
y = self.inverse_right(y,11)
return y&0xffffffff
def setstate(self,s):
if(len(s)!=624):
raise ValueError("The length of prediction must be 624!")
for i in range(624):
self.mt[i]=self.recover(s[i])
#self.mt=s
self.mti=0
def predict(self,s):
self.setstate(s)
self.twist()
return self.getstate(True)
def invtwist(self):
high = 0x80000000
low = 0x7fffffff
mask = 0x9908b0df
for i in range(623,-1,-1):
tmp = self.mt[i]^self.mt[(i+397)%624]
if tmp & high == high:
tmp ^= mask
tmp <<= 1
tmp |= 1
else:
tmp <<=1
res = tmp&high
tmp = self.mt[i-1]^self.mt[(i+396)%624]
if tmp & high == high:
tmp ^= mask
tmp <<= 1
tmp |= 1
else:
tmp <<=1
res |= (tmp)&low
self.mt[i] = res
def recover_seed(self,last):
n = 1 << 32
inv = invert(1812433253, n)
for i in range(623, 0, -1):
last = ((last - i) * inv) % n
last = self.inverse_right(last, 30)
return last
#reference https://www.anquanke.com/post/id/205861#h2-7
有了上面的知识,我们再来看这道题。题目给了我们104组输出,每组有32,64,96bit的三次输出,所以每组相当于输出了6次32bit数据,总共就相当于给了624组32bit输出,我们就可以根据这624次输出,预测下一次输出,从而得到flag。
from anti_mt19937 import *
from hashlib import md5
anti = anti_MT19937()
f = open('random.txt', 'r')
s = []
for line in f:
a = eval(line)
while a != 0:
s.append(a & 0xffffffff)
a = a >> 32
flag = md5(str(anti.predict(s)).encode()).hexdigest()
print(flag)
11.[NPUCTF2020]Mersenne twister
from hashlib import *
from itertools import *
from binascii import hexlify, unhexlify
from flag import flag, seed
assert len(flag) == 26
assert flag[:7] == 'npuctf{'
assert flag[-1] == '}'
XOR = lambda s1, s2: bytes([x1 ^ x2 for x1, x2 in zip(s1, s2)])
class mt73991:
def __init__(self, seed):
self.state = [seed] + [0] * 232
self.flag = 0
self.srand()
self.generate()
def srand(self):
for i in range(232):
self.state[i + 1] = 1812433253 * (self.state[i] ^ (self.state[i] >> 27)) - i
self.state[i + 1] &= 0xffffffff
def generate(self):
for i in range(233):
y = (self.state[i] & 0x80000000) | (self.state[(i + 1) % 233] & 0x7fffffff)
temp = y >> 1
temp ^= self.state[(i + 130) % 233]
if y & 1:
temp ^= 0x9908f23f
self.state[i] = temp
def getramdanbits(self):
if self.flag == 233:
self.generate()
self.flag = 0
bits = self.Next(self.state[self.flag]).to_bytes(4, 'big')
self.flag += 1
return bits
def Next(self, tmp):
tmp ^= (tmp >> 11)
tmp ^= (tmp << 7) & 0x9ddf4680
tmp ^= (tmp << 15) & 0xefc65400
tmp ^= (tmp >> 18) & 0x34adf670
return tmp
def encrypt(key, plain):
tmp = md5(plain).digest()
return hexlify(XOR(tmp, key))
if __name__ == "__main__":
flag = flag.encode()
random = mt73991(seed)
f = open('./cipher.txt', 'wb')
for i in flag:
key = b''.join([random.getramdanbits() for _ in range(4)])
cipher = encrypt(key, chr(i).encode())
f.write(cipher)
可以看到,题目是出题人自己魔改的MT19937,内部的寄存器只有233个状态。每次获取key,会得到输出的4个随机数,每次加密一个字符,先把字符MD5,然后和key异或。
由于题目给出前7个字符“npuctf{”,所以我们md5异或可以反向推出前28个状态,但是我们一共需要233个状态,所以只能爆破seed,通过截取第一次输出的key的前32位,并逆向next,就可以得到第一个state,所以只需要爆破32位的seed,让state[0]和得到的一样就可以了。
但是这样爆破就没什么意思了,在网上找了找,据说给出的最后的“}”是可以推seed的,那我们就来看看到底是什么情况。
assert len(flag) == 26
assert flag[:7] == 'npuctf{'
assert flag[-1] == '}'
def getramdanbits(self):
if self.flag == 233:
self.generate()
self.flag = 0
bits = self.Next(self.state[self.flag]).to_bytes(4, 'big')
self.flag += 1
return bits
注意到flag的长度只有26,那么key只调用getramdanbits()104次,class里的self.flag最大只有104,那么getramdanbits()每次执行时,并不会对原有的state进行修改。
def generate(self):
for i in range(233):
y = (self.state[i] & 0x80000000) | (self.state[(i + 1) % 233] & 0x7fffffff)
temp = y >> 1
temp ^= self.state[(i + 130) % 233]
if y & 1:
temp ^= 0x9908f23f
self.state[i] = temp
我们发现state生成的时候,使用了i,i+1,i+130,当i=103时,state[103]最后会和state[233%233]=state[0]进行异或,那么我们只需要先求出state[103],然后和state[0]异或,这样得到了y>>1,然后用上一题提到的方法恢复y的最后一位,这样我们就可以得到由oldstate[103]的第0位和oldstate[104]的1-31位组成的y。我们知道,第一次生成的state,相邻两个是有关系的,即
def srand(self):
for i in range(232):
self.state[i + 1] = 1812433253 * (self.state[i] ^ (self.state[i] >> 27)) - i
self.state[i + 1] &= 0xffffffff
因此,我们只需要猜测104的第0位,然后用103和104的关系进行验证,就可以筛选出正确的oldstate[104]。之后,我们仿照MT19937中,对init的逆向,对本题的初始化过程逆向,就可以得到seed,进而得到所有的初始state。最后我们把所有可见字符的md5值列举出来,让输出文件和key异或,然后在枚举的md值中找到对应字符,就可以获得最终的flag。
from binascii import hexlify, unhexlify
from hashlib import md5
from Crypto.Util.number import bytes_to_long,long_to_bytes
from gmpy2 import invert
XOR = lambda s1, s2: bytes([x1 ^ x2 for x1, x2 in zip(s1, s2)])
def inverse_right(res, shift, mask=0xffffffff, bits=32):
tmp = res
for i in range(bits // shift):
tmp = res ^ tmp >> shift & mask
return tmp
def inverse_left(res, shift, mask=0xffffffff, bits=32):
tmp = res
for i in range(bits // shift):
tmp = res ^ tmp << shift & mask
return tmp
# 逆srand
def recover(last):
n = 1 << 32
inv = invert(1812433253, n)
for i in range(103, -1, -1):
last = ((last + i) * inv) % n
last = inverse_right(last, 27)
return last
# 逆next
def Last(tmp):
tmp = inverse_right(tmp, 18, 0x34adf670)
tmp = inverse_left(tmp, 15, 0xefc65400)
tmp = inverse_left(tmp, 7, 0x9ddf4680)
tmp = inverse_right(tmp, 11)
return tmp
# 逆state0
cip = b'cef4876036ee8b55aa59bca043725bf3'
assert len(cip) == 32
cip = unhexlify(cip)
tmp = md5('n'.encode()).digest()
key = XOR(tmp, cip)
newstate0 = Last(bytes_to_long(key[:4]))
print(newstate0)
# 逆state103
key = unhexlify(b'b223dde6450ba6198e90e14de107aaf2')
tmp = md5('}'.encode()).digest()
key = XOR(tmp, key)
newstate103 = Last(bytes_to_long(key[-4:]))
print(newstate103)
# 得到oldstate104的两种情况
y_1 = newstate103 ^ newstate0
print(bin(y_1))
if newstate103 >> 31 == 1:
y = (y_1 ^ 0x9908b0df) << 1 + 1
else:
y = y_1 << 1
print(bin(y))
oldstate104_1 = (y << 1) >> 1
oldstate104_2 = oldstate104_1 + (1 << 31)
# 验证哪种是对的 这部分有点丑hhh
n = 1 << 32
inv = invert(1812433253, n)
t1 = oldstate104_1
t1 = ((t1 + 103) * inv) % n
t1 = inverse_right(t1, 27)
t2 = oldstate104_1
t2 = ((t2 + 103) * inv) % n
t2 = inverse_right(t2, 27)
oldstate104=0
if t1 & 0x80000000 == y & 0x80000000:
if t2 & 0x80000000 == y & 0x80000000:
print('both are right')
else:
oldstate104=oldstate104_1
else:
oldstate104=oldstate104_2
#恢复seed
seed=recover(oldstate104)
print(seed)
class mt73991:
def __init__(self, seed):
self.state = [seed] + [0] * 232
self.flag = 0
self.srand()
self.generate()
def srand(self):
for i in range(232):
self.state[i + 1] = 1812433253 * (self.state[i] ^ (self.state[i] >> 27)) - i
self.state[i + 1] &= 0xffffffff
def generate(self):
for i in range(233):
y = (self.state[i] & 0x80000000) | (self.state[(i + 1) % 233] & 0x7fffffff)
temp = y >> 1
temp ^= self.state[(i + 130) % 233]
if y & 1:
temp ^= 0x9908f23f
self.state[i] = temp
def getramdanbits(self):
if self.flag == 233:
self.generate()
self.flag = 0
#原题的代码下面的这行没int转换
bits = int(self.Next(self.state[self.flag])).to_bytes(4, 'big')
self.flag += 1
return bits
def Next(self, tmp):
tmp ^= (tmp >> 11)
tmp ^= (tmp << 7) & 0x9ddf4680
tmp ^= (tmp << 15) & 0xefc65400
tmp ^= (tmp >> 18) & 0x34adf670
return tmp
random = mt73991(seed)
#储存可见字符的md5
S=b''
for i in range(0x20,0x7f):
S+=long_to_bytes(i)
S_list = []
for i in S:
tmp = md5(chr(i).encode()).digest()
S_list.append(tmp)
def decrypt(key, cipher):
tmp = XOR(key, unhexlify(cipher))
for i in range(len(S)):
if S_list[i] == tmp:
return long_to_bytes(S[i])
return b''
#开始解密
c = open('./cipher.txt', 'rb').read()
flag = b''
for i in range(len(c)//32):
key = b''.join([random.getramdanbits() for _ in range(4)])
cipher = c[i*32:(i+1)*32]
flag += decrypt(key,cipher)
print(flag)
#reference https://am473ur.com/writeup-for-crypto-problems-in-npuctf-2020/#mersenne_twister
#reference https://0xdktb.top/2020/04/19/WriteUp-NPUCTF-Crypto/
以上就是这道题的所有内容,还是比较麻烦的一道题(当然可能是因为自己想了一天才想明白,还乱嫖脚本改都不改,导致seed一开始求错了),以后对于这类魔改算法的题目,需要对照原来的算法仔细分析,找到不同的地方,再进行接下来的事情。
Comments NOTHING