mgeeky-Penetration-Testing-.../phishing/phishing-HTML-linter.py

#!/usr/bin/python3

import os, sys, re
import string
import argparse
import yaml
import textwrap
import json
from urllib import parse
from bs4 import BeautifulSoup

options = {
    'format' : 'text',
}

executable_extensions = [
    '.exe',
    '.dll',
    '.lnk',
    '.scr',
    '.sys',
    '.ps1',
    '.bat',
    '.js',
    '.jse',
    '.vbs',
    '.vba',
    '.vbe',
    '.wsl',
    '.cpl',
]


options = {
    'debug': False,
    'verbose': False,
    'nocolor' : False,
    'log' : sys.stderr,
    'format' : 'text',
}

class Logger:
    colors_map = {
        'red':      31, 
        'green':    32, 
        'yellow':   33,
        'blue':     34, 
        'magenta':  35, 
        'cyan':     36,
        'white':    37, 
        'grey':     38,
    }

    colors_dict = {
        'error': colors_map['red'],
        'trace': colors_map['magenta'],
        'info ': colors_map['green'],
        'debug': colors_map['grey'],
        'other': colors_map['grey'],
    }

    options = {}

    def __init__(self, opts = None):
        self.options.update(Logger.options)
        if opts != None and len(opts) > 0:
            self.options.update(opts)

    @staticmethod
    def with_color(c, s):
        return "\x1b[%dm%s\x1b[0m" % (c, s)

    def colored(self, txt, col):
        if self.options['nocolor']:
            return txt

        return Logger.with_color(Logger.colors_map[col], txt)
        
    # Invocation:
    #   def out(txt, mode='info ', fd=None, color=None, noprefix=False, newline=True):
    @staticmethod
    def out(txt, fd, mode='info ', **kwargs):
        if txt == None or fd == 'none':
            return 
        elif fd == None:
            raise Exception('[ERROR] Logging descriptor has not been specified!')

        args = {
            'color': None, 
            'noprefix': False, 
            'newline': True,
            'nocolor' : False
        }
        args.update(kwargs)

        if type(txt) != str:
            txt = str(txt)
            
        txt = txt.replace('\t', ' ' * 4)

        if args['nocolor']:
            col = ''
        elif args['color']:
            col = args['color']
            if type(col) == str and col in Logger.colors_map.keys():
                col = Logger.colors_map[col]
        else:
            col = Logger.colors_dict.setdefault(mode, Logger.colors_map['grey'])

        prefix = ''
        if mode:
            mode = '[%s] ' % mode
            
        if not args['noprefix']:
            if args['nocolor']:
                prefix = mode.upper()
            else:
                prefix = Logger.with_color(Logger.colors_dict['other'], '%s' 
                % (mode.upper()))
        
        nl = ''
        if 'newline' in args:
            if args['newline']:
                nl = '\n'

        if 'force_stdout' in args:
            fd = sys.stdout

        if type(fd) == str:
            with open(fd, 'a') as f:
                prefix2 = ''
                if mode: 
                    prefix2 = '%s' % (mode.upper())
                f.write(prefix2 + txt + nl)
                f.flush()

        else:
            if args['nocolor']:
                fd.write(prefix + txt + nl)
            else:
                fd.write(prefix + Logger.with_color(col, txt) + nl)

    # Info shall be used as an ordinary logging facility, for every desired output.
    def info(self, txt, forced = False, **kwargs):
        kwargs['nocolor'] = self.options['nocolor']
        if forced or (self.options['verbose'] or \
            self.options['debug'] ) \
            or (type(self.options['log']) == str and self.options['log'] != 'none'):
            Logger.out(txt, self.options['log'], 'info', **kwargs)

    def text(self, txt, **kwargs):
        kwargs['noPrefix'] = True
        kwargs['nocolor'] = self.options['nocolor']
        Logger.out(txt, self.options['log'], '', **kwargs)

    def dbg(self, txt, **kwargs):
        if self.options['debug']:
            kwargs['nocolor'] = self.options['nocolor']
            Logger.out(txt, self.options['log'], 'debug', **kwargs)

    def err(self, txt, **kwargs):
        kwargs['nocolor'] = self.options['nocolor']
        Logger.out(txt, self.options['log'], 'error', **kwargs)

    def fatal(self, txt, **kwargs):
        kwargs['nocolor'] = self.options['nocolor']
        Logger.out(txt, self.options['log'], 'error', **kwargs)
        os._exit(1)

logger = Logger(options)

class PhishingMailParser:
    def __init__(self, options):
        self.options = options
        self.results = {}

    def parse(self, html):
        self.html = html
        self.soup = BeautifulSoup(html, features="lxml")

        self.results['Embedded Images']                                         = self.testEmbeddedImages()
        self.results['Images without ALT']                                      = self.testImagesNoAlt()
        self.results['Masqueraded Links']                                       = self.testMaskedLinks()
        self.results['Use of underline tag <u>']                                = self.testUnderlineTag()
        self.results['HTML code in <a> link tags']                              = self.testLinksWithHtmlCode()
        self.results['<a href="..."> URL contained GET parameter']              = self.testLinksWithGETParams()
        self.results['<a href="..."> URL contained GET parameter with URL']     = self.testLinksWithGETParamsBeingURLs()
        self.results['<a href="..."> URL pointed to an executable file']        = self.testLinksWithDangerousExtensions()

        return {k: v for k, v in self.results.items() if v}

    @staticmethod
    def context(tag):
        s = str(tag)

        if len(s) < 100:
            return s

        beg = s[:50]
        end = s[-50:]

        return f'{beg}...{end}'

    def testUnderlineTag(self):
        links = self.soup('u')

        if not links or len(links) == 0:
            return []

        desc = 'Underline tags are recognized by anti-spam filters and trigger additional rule (Office365: 67856001), but by their own shouldnt impact spam score.'
        result = f'- Found {len(links)} <u> tags. This is not by itself an indication of spam, but is known to trigger some rules (like Office365: 67856001)\n'

        context = ''
        for i in range(len(links)):
            context += str(links[i]) + '\n\n'
            if i > 5: break

        return {
            'description' : desc,
            'context' : context,
            'analysis' : result
        }

    def testLinksWithHtmlCode(self):
        links = self.soup('a')

        desc = 'Links that contain HTML code within <a> ... </a> may increase Spam score heavily'
        context = ''
        result = ''
        num = 0
        embed = ''

        for link in links:       
            text = str(link)
            pos = text.find('>')
            code = text[pos+1:]

            m = re.search(r'(.+)<\s*/\s*a\s*>', code, re.I)
            if m:
                code = m.group(1)

            suspicious = '<' in text and '>' in text

            if suspicious:
                num += 1

                if num < 5:
                    N = 70
                    tmp = text[:N]

                    if len(text) > N:
                        tmp += ' ... ' + text[-N:]

                    context += tmp + '\n'

                    code2 = PhishingMailParser.context(code)
                    context += f"\n\t- {logger.colored('Code inside of <a> tag:','red')}\n\t\t" + logger.colored(code2, 'yellow') + '\n'

        if num > 0:
            result += f'- Found {num} <a> tags that contained HTML code inside!\n'
            result +=  '\t  Links conveying HTML code within <a> ... </a> may greatly increase message Spam score!\n'

        if len(result) == 0:
            return []

        return {
            'description' : desc,
            'context' : context,
            'analysis' : result
        }


    def testLinksWithGETParams(self):
        links = self.soup('a')

        desc = 'Links with URLs containing GET parameters will be noticed by anti-spam filters resulting in another rule triggering on message (Office365: 21615005).'
        context = ''
        result = ''
        num = 0
        embed = ''

        for link in links:
            try:
                href = link['href']
            except:
                continue
        
            text = link.getText()
            params = dict(parse.parse_qsl(parse.urlsplit(href).query))

            if len(params) > 0:
                num += 1

                if num < 5:
                    context += PhishingMailParser.context(link) + '\n'
                    hr = href[:90]
                    pos = hr.find('?')
                    hr = hr[:pos] + logger.colored(hr[pos:], 'yellow')

                    context += f'\thref = "{hr}"\n'
                    context += f'\ttext = "{text[:90]}"\n\n'

        if num > 0:
            result += f'- Found {num} <a> tags with href="..." URLs containing GET params.\n'
            result +=  '\t  Links with URLs that contain GET params might trigger anti-spam rule (Office365: 21615005)\n'

        if len(result) == 0:
            return []

        return {
            'description' : desc,
            'context' : context,
            'analysis' : result
        }

    def testLinksWithDangerousExtensions(self):
        links = self.soup('a')

        desc = 'Message contained <a> tags with href="..." links pointing to a file with dangerous extension (such as .exe)'
        context = ''
        result = ''
        num = 0
        embed = ''

        for link in links:
            try:
                href = link['href']
            except:
                continue
        
            text = link.getText()
            parsed = parse.urlsplit(href)

            if '.' not in parsed.path: 
                continue

            pos = parsed.path.rfind('.')
            if pos == -1:
                continue

            extension = parsed.path.lower()[pos:]

            if extension in executable_extensions:
                num += 1

                if num < 5:
                    context += PhishingMailParser.context(link) + '\n'
                    hr = href[:90]
                    pos1 = hr.lower().find(extension.lower())

                    hr = logger.colored(hr[:pos1], 'yellow') + logger.colored(hr[pos1:pos1+len(extension)], 'red') + logger.colored(hr[pos1+len(extension):], 'yellow')

                    context += f'\thref = "{hr}"\n'
                    context += f'\ttext = "{text[:90]}"\n\n'

                    context += f'\tExtension matched: {logger.colored(extension, "red")}\n'

        if num > 0:
            result += f'- Found {num} <a> tags with href="..." URLs pointing to files with dangerous extensions (such as .exe).\n'
            result +=  '\t  Links with URLs that point to potentially executable files might trigger anti-spam rule (Office365: 460985005)\n'

        if len(result) == 0:
            return []

        return {
            'description' : desc,
            'context' : context,
            'analysis' : result
        }

    def testLinksWithGETParamsBeingURLs(self):
        links = self.soup('a')

        desc = 'Links with URLs that contain GET parameters pointing to another URL, will trigger two Office365 anti-spam rules (Office365: 45080400002).'
        context = ''
        result = ''
        num = 0
        embed = ''

        for link in links:
            try:
                href = link['href']
            except:
                continue
        
            text = link.getText()
            params = dict(parse.parse_qsl(parse.urlsplit(href).query))

            url = re.compile(r'((http|https)\:\/\/)?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*')

            if len(params) > 0:
                for k, v in params.items():
                    m = url.match(v)

                    if m:
                        urlmatched = m.group(1)
                        num += 1

                        if num < 5:
                            context += PhishingMailParser.context(link) + '\n'

                            hr = href[:90]
                            hr = logger.colored(hr, 'yellow')

                            context += f'\thref = "{hr}"\n'
                            context += f'\ttext = "{text[:90]}"\n\n'
                            context += f'\thref URL GET parameter contained another URL:\n\t\t' + logger.colored(v, "red") + '\n'

        if num > 0:
            result += f'- Found {num} <a> tags with href="..." URLs containing GET params containing another URL.\n'
            result +=  '\t  Links with URLs that contain GET params with another URL might trigger anti-spam rule (Office365: 45080400002)\n'

        if len(result) == 0:
            return []

        return {
            'description' : desc,
            'context' : context,
            'analysis' : result
        }


    def testMaskedLinks(self):
        links = self.soup('a')

        desc = 'Links that masquerade their href= attribute by displaying different link are considered harmful and will increase Spam score.'
        context = ''
        result = ''
        num = 0
        embed = ''

        for link in links:
            try:
                href = link['href']
            except:
                continue
        
            text = link.getText()

            url = re.compile(r'((http|https)\:\/\/)?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*')

            m1 = url.match(href)
            m2 = url.match(text)

            if m1 and m2:
                num += 1

                if num < 5:
                    context += PhishingMailParser.context(link) + '\n'
                    context += f'\thref = "{logger.colored(href[:90],"green")}"\n'
                    context += f'\ttext = "{logger.colored(text[:90],"red")}"\n\n'

        if num > 0:
            result += f'- Found {num} <a> tags that masquerade their href="" links with text!\n'
            result +=  '\t  Links that try to hide underyling URL are harmful and will be considered as Spam!\n'

        if len(result) == 0:
            return []

        return {
            'description' : desc,
            'context' : context,
            'analysis' : result
        }

    def testImagesNoAlt(self):
        images = self.soup('img')

        desc = 'Images without ALT="value" attribute may increase Spam scorage.'
        context = ''
        result = ''
        num = 0
        embed = ''

        for img in images:
            src = img['src']
            alt = ''

            try:
                alt = img['alt']
            except:
                pass

            if alt == '':
                num += 1

                if num < 5:
                    context += PhishingMailParser.context(img) + '\n\n'

        if num > 0:
            result += f'- Found {num} <img> tags without ALT="value" attribute.\n'
            result +=  '\t  Images without alternate text set in their attribute may increase Spam score\n'

        if len(result) == 0:
            return []

        return {
            'description' : desc,
            'context' : context,
            'analysis' : result
        }

    def testEmbeddedImages(self):
        images = self.soup('img')

        desc = 'Embedded images can increase Spam Confidence Level (SCL) in Office365 by 4 points. Embedded images are those with <img src="data:image/png;base64,<BLOB>"/> . They should be avoided.'
        context = ''
        result = ''
        num = 0
        embed = ''

        for img in images:
            src = img['src']
            alt = ''

            try:
                alt = img['alt']
            except:
                pass

            if src.lower().startswith('data:image/'):
                if len(embed) == 0:
                    embed = src[:30]

                num += 1

                if num < 5:
                    if len(alt) > 0:
                        context += f'- ALT="{alt}": ' + PhishingMailParser.context(img) + '\n'
                    else:
                        ctx = PhishingMailParser.context(img)
                        pos = ctx.find('data:')
                        pos2 = ctx.find('"', pos+1)

                        ctx = logger.colored(ctx[:pos], 'yellow') + logger.colored(ctx[pos:pos2], 'red') + logger.colored(ctx[pos2:], 'yellow')

                        context += ctx + '\n'

        if num > 0:
            result += f'- Found {num} <img> tags with embedded image ({embed}).\n'
            result +=  '\t  Embedded images increase Office365 SCL (Spam) level by 4 points!\n'

        if len(result) == 0:
            return []

        return {
            'description' : desc,
            'context' : context,
            'analysis' : result
        }


def printOutput(out):
    if options['format'] == 'text':
        width = 100
        num = 0

        for k, v in out.items():
            num += 1
            analysis = v['analysis'].strip()
            context = v['context'].strip()
            desc = '\n'.join(textwrap.wrap(
                v['description'],
                width = 80,
                initial_indent = '',
                subsequent_indent = '    '
            )).strip()

            analysis = analysis.replace('- ', '\t- ')

            print(f'''
------------------------------------------
({num}) Test: {logger.colored(k, "cyan")}

{logger.colored("DESCRIPTION", "blue")}: 

    {desc}

{logger.colored("CONTEXT", "blue")}: 

    {context}

{logger.colored("ANALYSIS", "blue")}: 

    {analysis}
''')
            
    elif options['format'] == 'json':
        print(json.dumps(out))

def opts(argv):
    global options
    global headers

    o = argparse.ArgumentParser(
        usage = 'phishing-HTML-linter.py [options] <file.html>'
    )
    
    req = o.add_argument_group('Required arguments')
    req.add_argument('file', help = 'Input HTML file')

    args = o.parse_args()
    options.update(vars(args))
    return args

def main(argv):
    args = opts(argv)
    if not args:
        return False

    print('''
    :: Phishing HTML Linter
    Shows you bad smells in your HTML code that will get your mails busted!
    Mariusz Banach / mgeeky
''')

    html = ''
    with open(args.file, 'rb') as f:
        html = f.read()

    p = PhishingMailParser({})
    ret = p.parse(html.decode())

    if len(ret) > 0:
        printOutput(ret)

    else:
        print('\n[+] Congrats! Your message does not have any known bad smells that could trigger anti-spam rules.\n')
    

if __name__ == '__main__':
    main(sys.argv)
Added phishing directory. 2021-10-17 15:22:05 +02:00			`#!/usr/bin/python3`

			`import os, sys, re`
			`import string`
			`import argparse`
			`import yaml`
updated decode-spam-headers.py 2021-10-28 21:12:23 +02:00			`import textwrap`
Added phishing directory. 2021-10-17 15:22:05 +02:00			`import json`
heavily updated decode-spam-headers.py and phishing-HTML-linter.py 2021-10-29 03:22:54 +02:00			`from urllib import parse`
Added phishing directory. 2021-10-17 15:22:05 +02:00			`from bs4 import BeautifulSoup`

			`options = {`
			`'format' : 'text',`
			`}`

heavily updated decode-spam-headers.py and phishing-HTML-linter.py 2021-10-29 03:22:54 +02:00			`executable_extensions = [`
			`'.exe',`
			`'.dll',`
			`'.lnk',`
			`'.scr',`
			`'.sys',`
			`'.ps1',`
			`'.bat',`
			`'.js',`
			`'.jse',`
			`'.vbs',`
			`'.vba',`
			`'.vbe',`
			`'.wsl',`
			`'.cpl',`
			`]`


			`options = {`
			`'debug': False,`
			`'verbose': False,`
			`'nocolor' : False,`
			`'log' : sys.stderr,`
			`'format' : 'text',`
			`}`

			`class Logger:`
			`colors_map = {`
			`'red': 31,`
			`'green': 32,`
			`'yellow': 33,`
			`'blue': 34,`
			`'magenta': 35,`
			`'cyan': 36,`
			`'white': 37,`
			`'grey': 38,`
			`}`

			`colors_dict = {`
			`'error': colors_map['red'],`
			`'trace': colors_map['magenta'],`
			`'info ': colors_map['green'],`
			`'debug': colors_map['grey'],`
			`'other': colors_map['grey'],`
			`}`

			`options = {}`

			`def __init__(self, opts = None):`
			`self.options.update(Logger.options)`
			`if opts != None and len(opts) > 0:`
			`self.options.update(opts)`

			`@staticmethod`
			`def with_color(c, s):`
			`return "\x1b[%dm%s\x1b[0m" % (c, s)`

			`def colored(self, txt, col):`
			`if self.options['nocolor']:`
			`return txt`

			`return Logger.with_color(Logger.colors_map[col], txt)`

			`# Invocation:`
			`# def out(txt, mode='info ', fd=None, color=None, noprefix=False, newline=True):`
			`@staticmethod`
			`def out(txt, fd, mode='info ', **kwargs):`
			`if txt == None or fd == 'none':`
			`return`
			`elif fd == None:`
			`raise Exception('[ERROR] Logging descriptor has not been specified!')`

			`args = {`
			`'color': None,`
			`'noprefix': False,`
			`'newline': True,`
			`'nocolor' : False`
			`}`
			`args.update(kwargs)`

			`if type(txt) != str:`
			`txt = str(txt)`

			`txt = txt.replace('\t', ' ' * 4)`

			`if args['nocolor']:`
			`col = ''`
			`elif args['color']:`
			`col = args['color']`
			`if type(col) == str and col in Logger.colors_map.keys():`
			`col = Logger.colors_map[col]`
			`else:`
			`col = Logger.colors_dict.setdefault(mode, Logger.colors_map['grey'])`

			`prefix = ''`
			`if mode:`
			`mode = '[%s] ' % mode`

			`if not args['noprefix']:`
			`if args['nocolor']:`
			`prefix = mode.upper()`
			`else:`
			`prefix = Logger.with_color(Logger.colors_dict['other'], '%s'`
			`% (mode.upper()))`

			`nl = ''`
			`if 'newline' in args:`
			`if args['newline']:`
			`nl = '\n'`

			`if 'force_stdout' in args:`
			`fd = sys.stdout`

			`if type(fd) == str:`
			`with open(fd, 'a') as f:`
			`prefix2 = ''`
			`if mode:`
			`prefix2 = '%s' % (mode.upper())`
			`f.write(prefix2 + txt + nl)`
			`f.flush()`

			`else:`
			`if args['nocolor']:`
			`fd.write(prefix + txt + nl)`
			`else:`
			`fd.write(prefix + Logger.with_color(col, txt) + nl)`

			`# Info shall be used as an ordinary logging facility, for every desired output.`
			`def info(self, txt, forced = False, **kwargs):`
			`kwargs['nocolor'] = self.options['nocolor']`
			`if forced or (self.options['verbose'] or \`
			`self.options['debug'] ) \`
			`or (type(self.options['log']) == str and self.options['log'] != 'none'):`
			`Logger.out(txt, self.options['log'], 'info', **kwargs)`

			`def text(self, txt, **kwargs):`
			`kwargs['noPrefix'] = True`
			`kwargs['nocolor'] = self.options['nocolor']`
			`Logger.out(txt, self.options['log'], '', **kwargs)`

			`def dbg(self, txt, **kwargs):`
			`if self.options['debug']:`
			`kwargs['nocolor'] = self.options['nocolor']`
			`Logger.out(txt, self.options['log'], 'debug', **kwargs)`

			`def err(self, txt, **kwargs):`
			`kwargs['nocolor'] = self.options['nocolor']`
			`Logger.out(txt, self.options['log'], 'error', **kwargs)`

			`def fatal(self, txt, **kwargs):`
			`kwargs['nocolor'] = self.options['nocolor']`
			`Logger.out(txt, self.options['log'], 'error', **kwargs)`
			`os._exit(1)`

			`logger = Logger(options)`

Added phishing directory. 2021-10-17 15:22:05 +02:00			`class PhishingMailParser:`
			`def __init__(self, options):`
			`self.options = options`
			`self.results = {}`

			`def parse(self, html):`
			`self.html = html`
			`self.soup = BeautifulSoup(html, features="lxml")`

heavily updated decode-spam-headers.py and phishing-HTML-linter.py 2021-10-29 03:22:54 +02:00			`self.results['Embedded Images'] = self.testEmbeddedImages()`
			`self.results['Images without ALT'] = self.testImagesNoAlt()`
			`self.results['Masqueraded Links'] = self.testMaskedLinks()`
			`self.results['Use of underline tag <u>'] = self.testUnderlineTag()`
			`self.results['HTML code in <a> link tags'] = self.testLinksWithHtmlCode()`
			`self.results['<a href="..."> URL contained GET parameter'] = self.testLinksWithGETParams()`
			`self.results['<a href="..."> URL contained GET parameter with URL'] = self.testLinksWithGETParamsBeingURLs()`
			`self.results['<a href="..."> URL pointed to an executable file'] = self.testLinksWithDangerousExtensions()`
Added phishing directory. 2021-10-17 15:22:05 +02:00
			`return {k: v for k, v in self.results.items() if v}`

			`@staticmethod`
			`def context(tag):`
			`s = str(tag)`

			`if len(s) < 100:`
			`return s`

			`beg = s[:50]`
			`end = s[-50:]`

			`return f'{beg}...{end}'`

updated decode-spam-headers.py 2021-10-28 21:12:23 +02:00			`def testUnderlineTag(self):`
			`links = self.soup('u')`

			`if not links or len(links) == 0:`
			`return []`

			`desc = 'Underline tags are recognized by anti-spam filters and trigger additional rule (Office365: 67856001), but by their own shouldnt impact spam score.'`
			`result = f'- Found {len(links)} <u> tags. This is not by itself an indication of spam, but is known to trigger some rules (like Office365: 67856001)\n'`

			`context = ''`
			`for i in range(len(links)):`
heavily updated decode-spam-headers.py and phishing-HTML-linter.py 2021-10-29 03:22:54 +02:00			`context += str(links[i]) + '\n\n'`
			`if i > 5: break`
updated decode-spam-headers.py 2021-10-28 21:12:23 +02:00
			`return {`
			`'description' : desc,`
			`'context' : context,`
			`'analysis' : result`
			`}`

heavily updated decode-spam-headers.py and phishing-HTML-linter.py 2021-10-29 03:22:54 +02:00			`def testLinksWithHtmlCode(self):`
			`links = self.soup('a')`

			`desc = 'Links that contain HTML code within <a> ... </a> may increase Spam score heavily'`
			`context = ''`
			`result = ''`
			`num = 0`
			`embed = ''`

			`for link in links:`
			`text = str(link)`
			`pos = text.find('>')`
			`code = text[pos+1:]`

			`m = re.search(r'(.+)<\s/\sa\s*>', code, re.I)`
			`if m:`
			`code = m.group(1)`

			`suspicious = '<' in text and '>' in text`

			`if suspicious:`
			`num += 1`

			`if num < 5:`
			`N = 70`
			`tmp = text[:N]`

			`if len(text) > N:`
			`tmp += ' ... ' + text[-N:]`

			`context += tmp + '\n'`

			`code2 = PhishingMailParser.context(code)`
			`context += f"\n\t- {logger.colored('Code inside of <a> tag:','red')}\n\t\t" + logger.colored(code2, 'yellow') + '\n'`

			`if num > 0:`
			`result += f'- Found {num} <a> tags that contained HTML code inside!\n'`
			`result += '\t Links conveying HTML code within <a> ... </a> may greatly increase message Spam score!\n'`

			`if len(result) == 0:`
			`return []`

			`return {`
			`'description' : desc,`
			`'context' : context,`
			`'analysis' : result`
			`}`


			`def testLinksWithGETParams(self):`
			`links = self.soup('a')`

			`desc = 'Links with URLs containing GET parameters will be noticed by anti-spam filters resulting in another rule triggering on message (Office365: 21615005).'`
			`context = ''`
			`result = ''`
			`num = 0`
			`embed = ''`

			`for link in links:`
			`try:`
			`href = link['href']`
			`except:`
			`continue`

			`text = link.getText()`
			`params = dict(parse.parse_qsl(parse.urlsplit(href).query))`

			`if len(params) > 0:`
			`num += 1`

			`if num < 5:`
			`context += PhishingMailParser.context(link) + '\n'`
			`hr = href[:90]`
			`pos = hr.find('?')`
			`hr = hr[:pos] + logger.colored(hr[pos:], 'yellow')`

			`context += f'\thref = "{hr}"\n'`
			`context += f'\ttext = "{text[:90]}"\n\n'`

			`if num > 0:`
			`result += f'- Found {num} <a> tags with href="..." URLs containing GET params.\n'`
			`result += '\t Links with URLs that contain GET params might trigger anti-spam rule (Office365: 21615005)\n'`

			`if len(result) == 0:`
			`return []`

			`return {`
			`'description' : desc,`
			`'context' : context,`
			`'analysis' : result`
			`}`

			`def testLinksWithDangerousExtensions(self):`
			`links = self.soup('a')`

			`desc = 'Message contained <a> tags with href="..." links pointing to a file with dangerous extension (such as .exe)'`
			`context = ''`
			`result = ''`
			`num = 0`
			`embed = ''`

			`for link in links:`
			`try:`
			`href = link['href']`
			`except:`
			`continue`

			`text = link.getText()`
			`parsed = parse.urlsplit(href)`

			`if '.' not in parsed.path:`
			`continue`

			`pos = parsed.path.rfind('.')`
			`if pos == -1:`
			`continue`

			`extension = parsed.path.lower()[pos:]`

			`if extension in executable_extensions:`
			`num += 1`

			`if num < 5:`
			`context += PhishingMailParser.context(link) + '\n'`
			`hr = href[:90]`
			`pos1 = hr.lower().find(extension.lower())`

			`hr = logger.colored(hr[:pos1], 'yellow') + logger.colored(hr[pos1:pos1+len(extension)], 'red') + logger.colored(hr[pos1+len(extension):], 'yellow')`

			`context += f'\thref = "{hr}"\n'`
			`context += f'\ttext = "{text[:90]}"\n\n'`

			`context += f'\tExtension matched: {logger.colored(extension, "red")}\n'`

			`if num > 0:`
			`result += f'- Found {num} <a> tags with href="..." URLs pointing to files with dangerous extensions (such as .exe).\n'`
			`result += '\t Links with URLs that point to potentially executable files might trigger anti-spam rule (Office365: 460985005)\n'`

			`if len(result) == 0:`
			`return []`

			`return {`
			`'description' : desc,`
			`'context' : context,`
			`'analysis' : result`
			`}`

			`def testLinksWithGETParamsBeingURLs(self):`
			`links = self.soup('a')`

			`desc = 'Links with URLs that contain GET parameters pointing to another URL, will trigger two Office365 anti-spam rules (Office365: 45080400002).'`
			`context = ''`
			`result = ''`
			`num = 0`
			`embed = ''`

			`for link in links:`
			`try:`
			`href = link['href']`
			`except:`
			`continue`

			`text = link.getText()`
			`params = dict(parse.parse_qsl(parse.urlsplit(href).query))`

			`url = re.compile(r'((http\|https)\:\/\/)?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*')`

			`if len(params) > 0:`
			`for k, v in params.items():`
			`m = url.match(v)`

			`if m:`
			`urlmatched = m.group(1)`
			`num += 1`

			`if num < 5:`
			`context += PhishingMailParser.context(link) + '\n'`

			`hr = href[:90]`
			`hr = logger.colored(hr, 'yellow')`

			`context += f'\thref = "{hr}"\n'`
			`context += f'\ttext = "{text[:90]}"\n\n'`
			`context += f'\thref URL GET parameter contained another URL:\n\t\t' + logger.colored(v, "red") + '\n'`

			`if num > 0:`
			`result += f'- Found {num} <a> tags with href="..." URLs containing GET params containing another URL.\n'`
			`result += '\t Links with URLs that contain GET params with another URL might trigger anti-spam rule (Office365: 45080400002)\n'`

			`if len(result) == 0:`
			`return []`

			`return {`
			`'description' : desc,`
			`'context' : context,`
			`'analysis' : result`
			`}`


Added phishing directory. 2021-10-17 15:22:05 +02:00			`def testMaskedLinks(self):`
			`links = self.soup('a')`

			`desc = 'Links that masquerade their href= attribute by displaying different link are considered harmful and will increase Spam score.'`
			`context = ''`
			`result = ''`
			`num = 0`
			`embed = ''`

			`for link in links:`
			`try:`
			`href = link['href']`
			`except:`
			`continue`

			`text = link.getText()`

			`url = re.compile(r'((http\|https)\:\/\/)?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*')`

			`m1 = url.match(href)`
			`m2 = url.match(text)`

			`if m1 and m2:`
			`num += 1`
heavily updated decode-spam-headers.py and phishing-HTML-linter.py 2021-10-29 03:22:54 +02:00
			`if num < 5:`
			`context += PhishingMailParser.context(link) + '\n'`
			`context += f'\thref = "{logger.colored(href[:90],"green")}"\n'`
			`context += f'\ttext = "{logger.colored(text[:90],"red")}"\n\n'`
Added phishing directory. 2021-10-17 15:22:05 +02:00
			`if num > 0:`
			`result += f'- Found {num} <a> tags that masquerade their href="" links with text!\n'`
			`result += '\t Links that try to hide underyling URL are harmful and will be considered as Spam!\n'`

			`if len(result) == 0:`
			`return []`

			`return {`
			`'description' : desc,`
			`'context' : context,`
			`'analysis' : result`
			`}`

			`def testImagesNoAlt(self):`
			`images = self.soup('img')`

			`desc = 'Images without ALT="value" attribute may increase Spam scorage.'`
			`context = ''`
			`result = ''`
			`num = 0`
			`embed = ''`

			`for img in images:`
			`src = img['src']`
			`alt = ''`

			`try:`
			`alt = img['alt']`
			`except:`
			`pass`

			`if alt == '':`
			`num += 1`
heavily updated decode-spam-headers.py and phishing-HTML-linter.py 2021-10-29 03:22:54 +02:00
			`if num < 5:`
			`context += PhishingMailParser.context(img) + '\n\n'`
Added phishing directory. 2021-10-17 15:22:05 +02:00
			`if num > 0:`
			`result += f'- Found {num} <img> tags without ALT="value" attribute.\n'`
			`result += '\t Images without alternate text set in their attribute may increase Spam score\n'`

			`if len(result) == 0:`
			`return []`

			`return {`
			`'description' : desc,`
			`'context' : context,`
			`'analysis' : result`
			`}`

			`def testEmbeddedImages(self):`
			`images = self.soup('img')`

			`desc = 'Embedded images can increase Spam Confidence Level (SCL) in Office365 by 4 points. Embedded images are those with <img src="data:image/png;base64,<BLOB>"/> . They should be avoided.'`
			`context = ''`
			`result = ''`
			`num = 0`
			`embed = ''`

			`for img in images:`
			`src = img['src']`
			`alt = ''`

			`try:`
			`alt = img['alt']`
			`except:`
			`pass`

			`if src.lower().startswith('data:image/'):`
			`if len(embed) == 0:`
			`embed = src[:30]`

			`num += 1`
heavily updated decode-spam-headers.py and phishing-HTML-linter.py 2021-10-29 03:22:54 +02:00
			`if num < 5:`
			`if len(alt) > 0:`
			`context += f'- ALT="{alt}": ' + PhishingMailParser.context(img) + '\n'`
			`else:`
			`ctx = PhishingMailParser.context(img)`
			`pos = ctx.find('data:')`
			`pos2 = ctx.find('"', pos+1)`

			`ctx = logger.colored(ctx[:pos], 'yellow') + logger.colored(ctx[pos:pos2], 'red') + logger.colored(ctx[pos2:], 'yellow')`

			`context += ctx + '\n'`
Added phishing directory. 2021-10-17 15:22:05 +02:00
			`if num > 0:`
			`result += f'- Found {num} <img> tags with embedded image ({embed}).\n'`
			`result += '\t Embedded images increase Office365 SCL (Spam) level by 4 points!\n'`

			`if len(result) == 0:`
			`return []`

			`return {`
			`'description' : desc,`
			`'context' : context,`
			`'analysis' : result`
			`}`


			`def printOutput(out):`
			`if options['format'] == 'text':`
			`width = 100`
			`num = 0`

			`for k, v in out.items():`
			`num += 1`
heavily updated decode-spam-headers.py and phishing-HTML-linter.py 2021-10-29 03:22:54 +02:00			`analysis = v['analysis'].strip()`
			`context = v['context'].strip()`
updated decode-spam-headers.py 2021-10-28 21:12:23 +02:00			`desc = '\n'.join(textwrap.wrap(`
			`v['description'],`
			`width = 80,`
			`initial_indent = '',`
			`subsequent_indent = ' '`
heavily updated decode-spam-headers.py and phishing-HTML-linter.py 2021-10-29 03:22:54 +02:00			`)).strip()`
Added phishing directory. 2021-10-17 15:22:05 +02:00
			`analysis = analysis.replace('- ', '\t- ')`

			`print(f'''`
			`------------------------------------------`
heavily updated decode-spam-headers.py and phishing-HTML-linter.py 2021-10-29 03:22:54 +02:00			`({num}) Test: {logger.colored(k, "cyan")}`

			`{logger.colored("DESCRIPTION", "blue")}:`
Added phishing directory. 2021-10-17 15:22:05 +02:00
updated decode-spam-headers.py 2021-10-28 21:12:23 +02:00			`{desc}`

heavily updated decode-spam-headers.py and phishing-HTML-linter.py 2021-10-29 03:22:54 +02:00			`{logger.colored("CONTEXT", "blue")}:`

Added phishing directory. 2021-10-17 15:22:05 +02:00			`{context}`

heavily updated decode-spam-headers.py and phishing-HTML-linter.py 2021-10-29 03:22:54 +02:00			`{logger.colored("ANALYSIS", "blue")}:`

Added phishing directory. 2021-10-17 15:22:05 +02:00			`{analysis}`
			`''')`

			`elif options['format'] == 'json':`
			`print(json.dumps(out))`

			`def opts(argv):`
			`global options`
			`global headers`

			`o = argparse.ArgumentParser(`
			`usage = 'phishing-HTML-linter.py [options] <file.html>'`
			`)`

			`req = o.add_argument_group('Required arguments')`
			`req.add_argument('file', help = 'Input HTML file')`

			`args = o.parse_args()`
heavily updated decode-spam-headers.py and phishing-HTML-linter.py 2021-10-29 03:22:54 +02:00			`options.update(vars(args))`
Added phishing directory. 2021-10-17 15:22:05 +02:00			`return args`

			`def main(argv):`
			`args = opts(argv)`
			`if not args:`
			`return False`

			`print('''`
			`:: Phishing HTML Linter`
			`Shows you bad smells in your HTML code that will get your mails busted!`
updates 2021-10-24 23:11:42 +02:00			`Mariusz Banach / mgeeky`
Added phishing directory. 2021-10-17 15:22:05 +02:00			`''')`

			`html = ''`
			`with open(args.file, 'rb') as f:`
			`html = f.read()`

			`p = PhishingMailParser({})`
			`ret = p.parse(html.decode())`

heavily updated decode-spam-headers.py and phishing-HTML-linter.py 2021-10-29 03:22:54 +02:00			`if len(ret) > 0:`
			`printOutput(ret)`

			`else:`
			`print('\n[+] Congrats! Your message does not have any known bad smells that could trigger anti-spam rules.\n')`
Added phishing directory. 2021-10-17 15:22:05 +02:00

			`if __name__ == '__main__':`
			`main(sys.argv)`