578 lines
18 KiB
Raw Normal View History

2018-02-02 22:22:43 +01:00
# ReEncoder.py - script allowing for recursive encoding detection, decoding and then re-encoding.
# To be used for instance in fuzzing purposes. Imagine you want to fuzz XML parameters within
# **PaReq** packet of 3DSecure standard. This packet has been ZLIB compressed, then Base64 encoded,
2018-04-16 17:41:59 +02:00
# then URLEncoded. In order to modify the inner XML you would need to peel off that encoding layers
# and then reaplly them in reversed order. This script allows you to do that in an automated manner
2018-02-02 22:22:43 +01:00
# If the input string's length is divisble by 4, Base64 will be able to decode it - thus, the script
# would wrongly assume it has been encoded using Base64. The same goes for Hex decoding.
# In order to tackle this issue, the script builds up a tree of possible encoding schemes and then evaluate
# that tree by choosing the best fitting encodings path (with most points counted upon resulted text's length,
# entropy and printable'ity).
# Requires:
# - jwt
# - anytree
2021-10-24 23:11:42 +02:00
# Mariusz Banach, 2018
2018-02-02 22:22:43 +01:00
import os
2018-02-02 22:22:43 +01:00
import re
import sys
import jwt
import zlib
2018-02-02 22:22:43 +01:00
import math
import base64
import urllib
import string
import anytree
import binascii
from collections import Counter
# =============================================
2018-02-02 22:22:43 +01:00
class ReEncoder:
# Switch this to show some verbose informations about decoding process.
DEBUG = True
PREFER_AUTO = 0 # Automatically determine final output format
PREFER_TEXT = 1 # Prefer text/printable final output format
PREFER_BINARY = 2 # Prefer binary final output format
class Utils:
def isBinaryData(data):
nonBinary = 0
percOfBinaryToAssume = 0.10
for d in data:
c = ord(d)
if c in (10, 13):
nonBinary += 1
elif c >= 0x20 and c <= 0x7f:
nonBinary += 1
binary = len(data) - nonBinary
return binary >= int(percOfBinaryToAssume * len(data))
2018-02-02 22:22:43 +01:00
# ============================================================
class Encoder:
def name(self):
raise NotImplementedError
def check(self, data):
raise NotImplementedError
def encode(self, data):
raise NotImplementedError
def decode(self, data):
raise NotImplementedError
class NoneEncoder(Encoder):
def name(self):
return 'None'
def check(self, data):
if not data:
return False
return True
def encode(self, data):
return data
def decode(self, data):
return data
class URLEncoder(Encoder):
def name(self):
return 'URLEncoder'
def check(self, data):
if urllib.quote(urllib.unquote(data)) == data and (urllib.unquote(data) != data):
return True
if re.search(r'(?:%[0-9a-f]{2})+', data, re.I):
2018-02-02 22:22:43 +01:00
return True
return False
def encode(self, data):
return urllib.quote(data)
def decode(self, data):
return urllib.unquote(data)
class HexEncoder(Encoder):
def name(self):
return 'HexEncoded'
def check(self, data):
m = re.match(r'^[0-9a-f]+$', data, re.I)
if m:
return True
return False
def encode(self, data):
return binascii.hexlify(data).strip()
def decode(self, data):
return binascii.unhexlify(data).strip()
class Base64Encoder(Encoder):
def name(self):
return 'Base64'
def check(self, data):
if base64.b64encode(base64.b64decode(data)) == data:
m = re.match('^(?:[A-Za-z0-9+\/]{4})*(?:[A-Za-z0-9+\/]{2}==|[A-Za-z0-9+\/]{3}=|[A-Za-z0-9+\/]{4})$', data, re.I)
if m:
return True
return False
2018-02-02 22:22:43 +01:00
return False
def encode(self, data):
return base64.b64encode(data)
def decode(self, data):
return base64.b64decode(data)
class Base64URLSafeEncoder(Encoder):
def name(self):
return 'Base64URLSafe'
def check(self, data):
if base64.urlsafe_b64encode(base64.urlsafe_b64decode(data)) == data:
m = re.match('^(?:[A-Za-z0-9\-_]{4})*(?:[A-Za-z0-9\-_]{2}==|[A-Za-z0-9\-_]{3}=|[A-Za-z0-9\-_]{4})$', data, re.I)
if m:
return True
return False
2018-02-02 22:22:43 +01:00
return False
def encode(self, data):
return base64.urlsafe_b64encode(data)
def decode(self, data):
return base64.urlsafe_b64decode(data)
class JWTEncoder(Encoder):
secret = ''
def name(self):
return 'JWT'
def check(self, data):
jwt.decode(data, verify = False)
return True
except jwt.exceptions.DecodeError:
return False
def encode(self, data):
return jwt.encode(data, JWTEncoder.secret)
def decode(self, data):
return jwt.decode(data, verify = False)
class ZlibEncoder(Encoder):
def name(self):
return 'ZLIB'
def check(self, data):
if not ReEncoder.Utils.isBinaryData(data):
return False
if zlib.compress(zlib.decompress(data)) == data:
return True
return False
def encode(self, data):
return zlib.compress(data)
def decode(self, data):
return zlib.decompress(data)
2018-02-02 22:22:43 +01:00
# ============================================================
MaxEncodingDepth = 20
def __init__(self):
self.encodings = []
self.encoders = (
2018-02-02 22:22:43 +01:00
# None must always be the last detector
self.encodersMap = {}
self.data = ''
self.preferredOutputFormat = ReEncoder.PREFER_AUTO
2018-02-02 22:22:43 +01:00
for encoder in self.encoders:
self.encodersMap[encoder.name()] = encoder
def log(text):
if ReEncoder.DEBUG:
def verifyEncodings(self, encodings):
for encoder in encodings:
if type(encoder) == str:
if not encoder in self.encodersMap.keys():
raise Exception("Passed unknown encoder's name.")
elif not issubclass(ReEncoder.Encoder, encoder):
raise Exception("Passed encoder is of unknown type.")
def generateEncodingTree(self, data):
step = 0
maxSteps = len(self.encoders) * ReEncoder.MaxEncodingDepth
peeledBefore = 0
peeledOff = 0
currData = data
while step < maxSteps:
peeledBefore = peeledOff
for encoder in self.encoders:
step += 1
ReEncoder.log('[.] Trying: {} (peeled off: {}). Current form: "{}"'.format(encoder.name(), peeledOff, currData))
if encoder.check(currData):
if encoder.name() == 'None':
if encoder.name().lower().startswith('base64') and (len(currData) % 4 == 0):
ReEncoder.log('[.] Unclear situation whether input ({}) is Base64 encoded. Branching.'.format(
yield ('None', currData, True)
if encoder.name().lower().startswith('hex') and (len(currData) % 2 == 0):
ReEncoder.log('[.] Unclear situation whether input ({}) is Hex encoded. Branching.'.format(
yield ('None', currData, True)
ReEncoder.log('[+] Detected encoder: {}'.format(encoder.name()))
currData = encoder.decode(currData)
yield (encoder.name(), currData, False)
peeledOff += 1
if (peeledOff - peeledBefore) == 0:
def formEncodingCandidates(self, root):
iters = [[node for node in children] for children in anytree.LevelOrderGroupIter(root)]
candidates = []
for node in iters[-1]:
name = node.name
decoded = node.decoded
ReEncoder.log('[.] Candidate for best decode using {}: "{}"...'.format(
name, decoded[:20]
candidates.append([name, decoded, 0.0])
return candidates
def entropy(data, unit='natural'):
base = {
'shannon' : 2.,
'natural' : math.exp(1),
'hartley' : 10.
if len(data) <= 1:
return 0
counts = Counter()
for d in data:
counts[d] += 1
probs = [float(c) / len(data) for c in counts.values()]
probs = [p for p in probs if p > 0.]
ent = 0
for p in probs:
if p > 0.:
ent -= p * math.log(p, base[unit])
return ent
def evaluateEncodingTree(self, root):
(printableEncodings, printableCandidate) = self.evaluateEncodingTreePicker(root, False)
(binaryEncodings, binaryCandidate) = self.evaluateEncodingTreePicker(root, True)
if self.preferredOutputFormat == ReEncoder.PREFER_TEXT:
ReEncoder.log('Returning text/printable output format as requested preferred one.')
return printableEncodings
elif self.preferredOutputFormat == ReEncoder.PREFER_BINARY:
ReEncoder.log('Returning binary output format as requested preferred one.')
return binaryEncodings
ReEncoder.log('Trying to determine preferred output format...')
ReEncoder.log('[>] Winning printable encoding path scored: {} points.'.format(
ReEncoder.log('[>] Winning binary encoding path scored: {} points.'.format(
if(printableCandidate[2] >= binaryCandidate[2]):
ReEncoder.log('\n[+] Choosing all-time winner: PRINTABLE output format.')
return printableEncodings
ReEncoder.log('\n[+] Choosing all-time winner: BINARY output format.')
return binaryEncodings
def evaluateEncodingTreePicker(self, root, preferBinary):
candidates = self._evaluateEncodingTreeWorker(root, preferBinary)
maxCandidate = 0
for i in range(len(candidates)):
candidate = candidates[i]
name = candidate[0]
decoded = candidate[1]
points = float(candidate[2])
if points > candidates[maxCandidate][2]:
maxCandidate = i
winningCandidate = candidates[maxCandidate]
winningPaths = anytree.search.findall_by_attr(
name = 'decoded',
value = winningCandidate[1]
ReEncoder.log('[?] Other equally good candidate paths:\n' + str(winningPaths))
winningPath = winningPaths[0]
preferred = 'printable'
if preferBinary:
preferred = 'binary'
ReEncoder.log('[+] Winning decode path for {} output is:\n{}'.format(
encodings = [x.name for x in winningPath.path if x != 'None']
return (encodings, winningCandidate)
def _evaluateEncodingTreeWorker(self, root, preferBinary = False):
2018-02-02 22:22:43 +01:00
weights = {
'unreadableChars' : 0.0,
'printableChars' : 9.6,
'entropyScore' : 4.0,
'length' : 1.0,
2018-02-02 22:22:43 +01:00
if preferBinary:
weights['unreadableChars'] = 24.0
weights['printableChars'] = 0.0
weights['entropyScore'] = 2.666667
2018-02-02 22:22:43 +01:00
candidates = self.formEncodingCandidates(root)
for i in range(len(candidates)):
candidate = candidates[i]
name = candidate[0]
decoded = candidate[1]
points = float(candidate[2])
entropy = ReEncoder.entropy(decoded)
printables = sum([int(x in string.printable) for x in decoded])
nonprintables = len(decoded) - printables
2018-02-02 22:22:43 +01:00
ReEncoder.log('[=] Evaluating candidate: {} (entropy: {}, data: "{}")'.format(
name, entropy, decoded
2018-02-02 22:22:43 +01:00
# Step 1: Adding points for printable percentage.
printablePoints = float(weights['printableChars']) * (float(printables) / float(len(decoded)))
nonPrintablePoints = float(weights['unreadableChars']) * (float(nonprintables) / float(len(decoded)))
2018-02-02 22:22:43 +01:00
# Step 2: If encoder is Base64 and was previously None
2018-02-02 22:22:43 +01:00
# - then length and entropy of previous values should be of slighly lower weights
if name.lower() == 'none' \
and len(candidates) > i+1 \
and candidates[i+1][0].lower().startswith('base64'):
ReEncoder.log('\tAdding fine for being base64')
entropyPoints = entropy * (weights['entropyScore'] * 0.666666)
lengthPoints = float(len(decoded)) * (weights['length'] * 0.666666)
2018-02-02 22:22:43 +01:00
entropyPoints = entropy * weights['entropyScore']
2018-02-02 22:22:43 +01:00
lengthPoints = float(len(decoded)) * weights['length']
if printables > nonprintables:
ReEncoder.log('More printable chars than binary ones.')
ReEncoder.log('\tAdding {} points for printable entropy.'.format(entropyPoints))
ReEncoder.log('\tAdding {} points for printable characters.'.format(printablePoints))
points += printablePoints
ReEncoder.log('More binary chars than printable ones.')
ReEncoder.log('\tAdding {} points for binary entropy.'.format(entropyPoints))
ReEncoder.log('\tAdding {} points for binary characters.'.format(nonPrintablePoints))
points += nonPrintablePoints
2018-02-02 22:22:43 +01:00
points += entropyPoints
# Step 4: Add points for length
2018-02-02 22:22:43 +01:00
ReEncoder.log('\tAdding {} points for length.'.format(lengthPoints))
points += lengthPoints
ReEncoder.log('\tScored in total: {} points.'.format(points))
candidates[i][2] = points
return candidates
2018-02-02 22:22:43 +01:00
def getWinningDecodePath(self, root):
return [x for x in self.evaluateEncodingTree(root) if x != 'None']
2018-02-02 22:22:43 +01:00
def process(self, data):
root = anytree.Node('None', decoded = data)
prev = root
for (name, curr, branch) in self.generateEncodingTree(data):
ReEncoder.log('[*] Generator returned: ("{}", "{}", {})'.format(
name, curr[:20], str(branch)
currNode = anytree.Node(name, parent = prev, decoded = curr)
if branch:
prev = currNode
for pre, fill, node in anytree.RenderTree(root):
if node.name != 'None':
ReEncoder.log("%s%s (%s)" % (pre, node.name, node.decoded[:20].decode('ascii', 'ignore')))
2018-02-02 22:22:43 +01:00
self.encodings = self.getWinningDecodePath(root)
2018-02-02 22:22:43 +01:00
ReEncoder.log('[+] Selected encodings: {}'.format(str(self.encodings)))
def decode(self, data, preferredOutputFormat = PREFER_AUTO, encodings = []):
self.preferredOutputFormat = preferredOutputFormat
if preferredOutputFormat != ReEncoder.PREFER_AUTO and \
preferredOutputFormat != ReEncoder.PREFER_TEXT and \
preferredOutputFormat != ReEncoder.PREFER_BINARY:
raise Exception('Unknown preferred output format specified in decode(): {}'.format(
2018-02-02 22:22:43 +01:00
if not encodings:
self.encodings = encodings
for encoderName in self.encodings:
d = self.encodersMap[encoderName].decode(data)
data = d
return data
def encode(self, data, encodings = []):
if encodings:
self.encodings = encodings
for encoderName in self.encodings[::-1]:
e = self.encodersMap[encoderName].encode(data)
data = e
return data
2018-02-02 22:22:43 +01:00
def main(argv):
# Sample 1: ZLIB -> Base64 -> URLEncode
sample = 'eJzzSM3JyVcozy%2FKSVFIK8rPVQhKdc1Lzk9JLVIEAIr8Cck%3D'
# Sample 2: URLEncode -> Base64 -> HexEncode
#sample = '4a5451344a5459314a545a6a4a545a6a4a545a6d4a5449774a5463334a545a6d4a5463794a545a6a4a5459304a5449784a5449774a544e684a544a6b4a544935'
2018-02-02 22:22:43 +01:00
if len(argv) != 2:
print('Usage: reencode.py <text|file>')
2018-02-02 22:22:43 +01:00
print('Using sample: "{}"'.format(sample))
text = sample
text = argv[1]
if os.path.isfile(text):
f = open(text, 'rb')
text = f.read()
2018-02-02 22:22:43 +01:00
decoder = ReEncoder()
decoded = decoder.decode(text)
print('(1) DECODED TEXT: "{}"'.format(decoded))
decoded = 'FOO ' + decoded + ' BAR'
print('\n(2) TO BE ENCODED TEXT: "{}"'.format(decoded))
decoded = decoder.encode(decoded)
print('(3) ENCODED FORM: "{}"'.format(decoded))
if __name__ == '__main__':