999 lines
38 KiB
Python
999 lines
38 KiB
Python
#!/usr/bin/python3
|
||
|
||
import os, sys, re
|
||
import string
|
||
import argparse
|
||
import yaml
|
||
import textwrap
|
||
import json
|
||
from urllib import parse
|
||
from bs4 import BeautifulSoup
|
||
|
||
options = {
|
||
'format' : 'text',
|
||
}
|
||
|
||
executable_extensions = [
|
||
'.exe',
|
||
'.dll',
|
||
'.lnk',
|
||
'.scr',
|
||
'.sys',
|
||
'.ps1',
|
||
'.bat',
|
||
'.js',
|
||
'.jse',
|
||
'.vbs',
|
||
'.vba',
|
||
'.vbe',
|
||
'.wsl',
|
||
'.cpl',
|
||
]
|
||
|
||
|
||
options = {
|
||
'debug': False,
|
||
'verbose': False,
|
||
'nocolor' : False,
|
||
'log' : sys.stderr,
|
||
'format' : 'text',
|
||
}
|
||
|
||
class Logger:
|
||
colors_map = {
|
||
'red': 31,
|
||
'green': 32,
|
||
'yellow': 33,
|
||
'blue': 34,
|
||
'magenta': 35,
|
||
'cyan': 36,
|
||
'white': 37,
|
||
'grey': 38,
|
||
}
|
||
|
||
colors_dict = {
|
||
'error': colors_map['red'],
|
||
'trace': colors_map['magenta'],
|
||
'info ': colors_map['green'],
|
||
'debug': colors_map['grey'],
|
||
'other': colors_map['grey'],
|
||
}
|
||
|
||
options = {}
|
||
|
||
def __init__(self, opts = None):
|
||
self.options.update(Logger.options)
|
||
if opts != None and len(opts) > 0:
|
||
self.options.update(opts)
|
||
|
||
@staticmethod
|
||
def with_color(c, s):
|
||
return "\x1b[%dm%s\x1b[0m" % (c, s)
|
||
|
||
def colored(self, txt, col):
|
||
if self.options['nocolor']:
|
||
return txt
|
||
|
||
return Logger.with_color(Logger.colors_map[col], txt)
|
||
|
||
# Invocation:
|
||
# def out(txt, mode='info ', fd=None, color=None, noprefix=False, newline=True):
|
||
@staticmethod
|
||
def out(txt, fd, mode='info ', **kwargs):
|
||
if txt == None or fd == 'none':
|
||
return
|
||
elif fd == None:
|
||
raise Exception('[ERROR] Logging descriptor has not been specified!')
|
||
|
||
args = {
|
||
'color': None,
|
||
'noprefix': False,
|
||
'newline': True,
|
||
'nocolor' : False
|
||
}
|
||
args.update(kwargs)
|
||
|
||
if type(txt) != str:
|
||
txt = str(txt)
|
||
|
||
txt = txt.replace('\t', ' ' * 4)
|
||
|
||
if args['nocolor']:
|
||
col = ''
|
||
elif args['color']:
|
||
col = args['color']
|
||
if type(col) == str and col in Logger.colors_map.keys():
|
||
col = Logger.colors_map[col]
|
||
else:
|
||
col = Logger.colors_dict.setdefault(mode, Logger.colors_map['grey'])
|
||
|
||
prefix = ''
|
||
if mode:
|
||
mode = '[%s] ' % mode
|
||
|
||
if not args['noprefix']:
|
||
if args['nocolor']:
|
||
prefix = mode.upper()
|
||
else:
|
||
prefix = Logger.with_color(Logger.colors_dict['other'], '%s'
|
||
% (mode.upper()))
|
||
|
||
nl = ''
|
||
if 'newline' in args:
|
||
if args['newline']:
|
||
nl = '\n'
|
||
|
||
if 'force_stdout' in args:
|
||
fd = sys.stdout
|
||
|
||
if type(fd) == str:
|
||
with open(fd, 'a') as f:
|
||
prefix2 = ''
|
||
if mode:
|
||
prefix2 = '%s' % (mode.upper())
|
||
f.write(prefix2 + txt + nl)
|
||
f.flush()
|
||
|
||
else:
|
||
if args['nocolor']:
|
||
fd.write(prefix + txt + nl)
|
||
else:
|
||
fd.write(prefix + Logger.with_color(col, txt) + nl)
|
||
|
||
# Info shall be used as an ordinary logging facility, for every desired output.
|
||
def info(self, txt, forced = False, **kwargs):
|
||
kwargs['nocolor'] = self.options['nocolor']
|
||
if forced or (self.options['verbose'] or \
|
||
self.options['debug'] ) \
|
||
or (type(self.options['log']) == str and self.options['log'] != 'none'):
|
||
Logger.out(txt, self.options['log'], 'info', **kwargs)
|
||
|
||
def text(self, txt, **kwargs):
|
||
kwargs['noPrefix'] = True
|
||
kwargs['nocolor'] = self.options['nocolor']
|
||
Logger.out(txt, self.options['log'], '', **kwargs)
|
||
|
||
def dbg(self, txt, **kwargs):
|
||
if self.options['debug']:
|
||
kwargs['nocolor'] = self.options['nocolor']
|
||
Logger.out(txt, self.options['log'], 'debug', **kwargs)
|
||
|
||
def err(self, txt, **kwargs):
|
||
kwargs['nocolor'] = self.options['nocolor']
|
||
Logger.out(txt, self.options['log'], 'error', **kwargs)
|
||
|
||
def fatal(self, txt, **kwargs):
|
||
kwargs['nocolor'] = self.options['nocolor']
|
||
Logger.out(txt, self.options['log'], 'error', **kwargs)
|
||
os._exit(1)
|
||
|
||
logger = Logger(options)
|
||
|
||
class PhishingMailParser:
|
||
|
||
#
|
||
# Based on:
|
||
# https://journeys.autopilotapp.com/blog/email-spam-trigger-words/
|
||
# https://www.activecampaign.com/blog/spam-words
|
||
# https://blog.hubspot.com/blog/tabid/6307/bid/30684/the-ultimate-list-of-email-spam-trigger-words.aspx
|
||
#
|
||
Suspicious_Words = {
|
||
'Manipulative': (
|
||
'creating unnecessary urgency or pressure',
|
||
(
|
||
"Act now", "Action", "Apply now", "Apply online", "Buy", "Buy direct", "Call", "Call now", "Click here",
|
||
"Clearance", "Click here", "Do it today", "Don't delete", "Drastically reduced", "Exclusive deal", "Expire",
|
||
"Get", "Get it now", "Get started now", "Important information regarding", "Instant", "Limited time",
|
||
"New customers only", "Now only", "Offer expires", "Once in a lifetime", "Order now", "Please read",
|
||
"Special promotion", "Take action", "This won't last", "Urgent", "While stocks last"
|
||
)
|
||
),
|
||
|
||
'Needy' : (
|
||
'sounding desperate or exaggerated claims',
|
||
(
|
||
"All-new", "Bargain", "Best price", "Bonus", "Email marketing", "Free", "For instant access", "Free gift",
|
||
"Free trial", "Have you been turned down?", "Great offer", "Join millions of Americans", "Incredible deal",
|
||
"Prize", "Satisfaction guaranteed", "Will not believe your eyes"
|
||
)
|
||
),
|
||
|
||
'Sleazy' : (
|
||
'being too pushy',
|
||
(
|
||
"As seen on", "Click here", "Click below", "Deal", "Direct email", "Direct marketing", "Do it today",
|
||
"Order now", "Order today", "Unlimited", "What are you waiting for?", "Visit our website"
|
||
)
|
||
),
|
||
|
||
'Cheap' : (
|
||
'no pre-qualifications, everybody wins',
|
||
(
|
||
"Acceptance", "Access", "Avoid bankruptcy", "Boss", "Cancel", "Card accepted", "Certified",
|
||
"Cheap", "Compare", "Compare rates", "Congratulations", "Credit card offers", "Cures", "Dear ",
|
||
"Dear friend", "Drastically reduced", "Easy terms", "Free grant money", "Free hosting", "Free info",
|
||
"Free membership", "Friend", "Get out of debt", "Giving away", "Guarantee", "Guaranteed",
|
||
"Have you been turned down?", "Hello", "Information you requested", "Join millions", "No age restrictions",
|
||
"No catch", "No experience", "No obligation", "No purchase necessary", "No questions asked",
|
||
"No strings attached", "Offer", "Opportunity", "Save big", "Winner", "Winning", "Won", "You are a winner!",
|
||
"You've been selected!"
|
||
)
|
||
),
|
||
|
||
'Far-fetched' : (
|
||
'statements that are too good to be true',
|
||
(
|
||
"Additional income", "All-natural", "Amazing", "Be your own boss", "Big bucks", "Billion",
|
||
"Billion dollars", "Cash", "Cash bonus", "Consolidate debt and credit", "Consolidate your debt",
|
||
"Double your income", "Earn", "Earn cash", "Earn extra cash", "Eliminate bad credit", "Eliminate debt",
|
||
"Extra", "Fantastic deal", "Financial freedom", "Financially independent", "Free investment", "Free money",
|
||
"Get paid", "Home", "Home-based", "Income", "Increase sales", "Increase traffic", "Lose", "Lose weight",
|
||
"Money back", "No catch", "No fees", "No hidden costs", "No strings attached", "Potential earnings",
|
||
"Pure profit", "Removes wrinkles", "Reverses aging", "Risk-free", "Serious cash", "Stop snoring",
|
||
"Vacation", "Vacation offers", "Weekend getaway", "Weight loss", "While you sleep", "Work from home"
|
||
)
|
||
),
|
||
|
||
'Exaggeration' : (
|
||
'exaggerated claims and promises',
|
||
(
|
||
"100% more", "100% free", "100% satisfied", "Additional income", "Be your own boss", "Best price",
|
||
"Big bucks", "Billion", "Cash bonus", "Cents on the dollar", "Consolidate debt", "Double your cash",
|
||
"Double your income", "Earn extra cash", "Earn money", "Eliminate bad credit", "Extra cash", "Extra income",
|
||
"Expect to earn", "Fast cash", "Financial freedom", "Free access", "Free consultation", "Free gift",
|
||
"Free hosting", "Free info", "Free investment", "Free membership", "Free money", "Free preview", "Free quote",
|
||
"Free trial", "Full refund", "Get out of debt", "Get paid", "Giveaway", "Guaranteed", "Increase sales",
|
||
"Increase traffic", "Incredible deal", "Lower rates", "Lowest price", "Make money", "Million dollars", "Miracle",
|
||
"Money back", "Once in a lifetime", "One time", "Pennies a day", "Potential earnings", "Prize",
|
||
"Promise", "Pure profit", "Risk-free", "Satisfaction guaranteed", "Save big money", "Save up to", "Special promotion",
|
||
)
|
||
),
|
||
|
||
'Urgency' : (
|
||
'create unnecessary urgency and pressure',
|
||
(
|
||
"Act now", "Apply now", "Become a member", "Call now", "Click below", "Click here", "Get it now",
|
||
"Do it today", "Don’t delete", "Exclusive deal", "Get started now", "Important information regarding",
|
||
"Information you requested", "Instant", "Limited time", "New customers only", "Order now", "Please read",
|
||
"See for yourself", "Sign up free", "Take action", "This won’t last", "Urgent", "What are you waiting for?",
|
||
"While supplies last", "Will not believe your eyes", "Winner", "Winning", "You are a winner", "You have been selected",
|
||
|
||
)
|
||
),
|
||
|
||
'Spammy' : (
|
||
'shady, spammy, or unethical behavior',
|
||
(
|
||
"Bulk email", "Buy direct", "Cancel at any time", "Check or money order", "Congratulations", "Confidentiality",
|
||
"Cures", "Dear friend", "Direct email", "Direct marketing", "Hidden charges", "Human growth hormone", "Internet marketing",
|
||
"Lose weight", "Mass email", "Meet singles", "Multi-level marketing", "No catch", "No cost", "No credit check",
|
||
"No fees", "No gimmick", "No hidden costs", "No hidden fees", "No interest", "No investment", "No obligation",
|
||
"No purchase necessary", "No questions asked", "No strings attached", "Not junk", "Notspam", "Obligation",
|
||
"Passwords", "Requires initial investment", "Social security number", "This isn’t a scam", "This isn’t junk",
|
||
"This isn’t spam", "Undisclosed", "Unsecured credit", "Unsecured debt", "Unsolicited", "Valium",
|
||
"Viagra", "Vicodin", "We hate spam", "Weight loss", "Xanax",
|
||
)
|
||
),
|
||
|
||
'Jargon' : (
|
||
'jargon or legalese',
|
||
(
|
||
"Accept credit cards", "All new", "As seen on", "Bargain", "Beneficiary", "Billing", "Bonus",
|
||
"Cards accepted", "Cash", "Certified", "Cheap", "Claims", "Clearance", "Compare rates", "Credit card offers",
|
||
"Deal", "Debt", "Discount", "Fantastic", "In accordance with laws", "Income", "Investment", "Join millions",
|
||
"Lifetime", "Loans", "Luxury", "Marketing solution", "Message contains", "Mortgage rates", "Name brand",
|
||
"Offer", "Online marketing", "Opt in", "Pre-approved", "Quote", "Rates", "Refinance", "Removal", "Reserves the right",
|
||
"Score", "Search engine", "Sent in compliance", "Subject to", "Terms and conditions", "Trial", "Unlimited",
|
||
"Warranty", "Web traffic", "Work from home",
|
||
)
|
||
),
|
||
|
||
'Shady' : (
|
||
'ethically or legally questionable behavior',
|
||
(
|
||
"Addresses", "Beneficiary", "Billing", "Casino", "Celebrity", "Collect child support", "Copy DVDs",
|
||
"Fast viagra delivery", "Hidden", "Human growth hormone", "In accordance with laws", "Investment",
|
||
"Junk", "Legal", "Life insurance", "Loan", "Lottery", "Luxury car", "Medicine", "Meet singles", "Message contains",
|
||
"Miracle", "Money", "Multi-level marketing", "Nigerian", "Offshore", "Online degree", "Online pharmacy", "Passwords",
|
||
"Refinance", "Request", "Rolex", "Score", "Social security number", "Spam", "This isn't spam", "Undisclosed recipient",
|
||
"University diplomas", "Unsecured credit", "Unsolicited", "US dollars", "Valium", "Viagra", "Vicodin",
|
||
"Warranty", "Xanax"
|
||
)
|
||
),
|
||
|
||
"Commerce" : (
|
||
"",
|
||
(
|
||
"As seen on", "Buy", "Buy direct", "Buying judgments", "Clearance", "Order", "Order status", "Orders shipped by shopper",
|
||
)
|
||
),
|
||
|
||
"Personal" : (
|
||
"",
|
||
(
|
||
"Dig up dirt on friends", "Meet singles", "Score with babes", "XXX", "Near you",
|
||
)
|
||
),
|
||
|
||
"Employment" : (
|
||
"",
|
||
(
|
||
"Additional income", "Be your own boss", "Compete for your business", "Double your", "Earn $", "Earn extra cash",
|
||
"Earn per week", "Expect to earn", "Extra income", "Home based", "Home employment", "Homebased business", "Income from home",
|
||
"Make $", "Make money", "Money making", "Online biz opportunity", "Online degree", "Opportunity",
|
||
"Potential earnings", "University diplomas", "While you sleep", "Work at home", "Work from home",
|
||
)
|
||
),
|
||
|
||
"Financial - General" : (
|
||
"",
|
||
(
|
||
"$$$", "Affordable", "Bargain", "Beneficiary", "Best price", "Big bucks", "Cash", "Cash bonus", "Cashcashcash",
|
||
"Cents on the dollar", "Cheap", "Check", "Claims", "Collect", "Compare rates", "Cost", "Credit", "Credit bureaus",
|
||
"Discount", "Earn", "Easy terms", "F r e e", "Fast cash", "For just $XXX", "Hidden assets", "hidden charges",
|
||
"Income", "Incredible deal", "Insurance", "Investment", "Loans", "Lowest price", "Million dollars", "Money",
|
||
"Money back", "Mortgage", "Mortgage rates", "No cost", "No fees", "One hundred percent free", "Only $", "Pennies a day",
|
||
"Price", "Profits", "Pure profit", "Quote", "Refinance", "Save $", "Save big money", "Save up to", "Serious cash",
|
||
"Subject to credit", "They keep your money — no refund!", "Unsecured credit", "Unsecured debt",
|
||
"US dollars", "Why pay more?",
|
||
)
|
||
),
|
||
|
||
"Financial - Business" : (
|
||
"",
|
||
(
|
||
"Accept credit cards", "Cards accepted", "Check or money order", "Credit card offers", "Explode your business",
|
||
"Full refund", "Investment decision", "No credit check", "No hidden Costs", "No investment",
|
||
"Requires initial investment", "Sent in compliance", "Stock alert", "Stock disclaimer statement", "Stock pick",
|
||
)
|
||
),
|
||
|
||
"Financial - Personal" : (
|
||
"",
|
||
(
|
||
"Avoice bankruptcy", "Calling creditors", "Collect child support", "Consolidate debt and credit",
|
||
"Consolidate your debt", "Eliminate bad credit", "Eliminate debt", "Financially independent",
|
||
"Get out of debt", "Get paid", "Lower interest rate", "Lower monthly payment", "Lower your mortgage rate",
|
||
"Lowest insurance rates", "Pre-approved", "Refinance home", "Social security number", "Your income",
|
||
)
|
||
),
|
||
|
||
"General" : (
|
||
"",
|
||
(
|
||
"Acceptance", "Accordingly", "Avoid", "Chance", "Dormant", "Freedom", "Here", "Hidden", "Home", "Leave",
|
||
"Lifetime", "Lose", "Maintained", "Medium", "Miracle", "Never", "Passwords", "Problem", "Remove", "Reverses",
|
||
"Sample", "Satisfaction", "Solution", "Stop", "Success", "Teen", "Wife",
|
||
)
|
||
),
|
||
|
||
"Greetings" : (
|
||
"",
|
||
(
|
||
"Dear ", "Friend", "Hello",
|
||
)
|
||
),
|
||
|
||
"Marketing" : (
|
||
"",
|
||
(
|
||
"Ad", "Auto email removal", "Bulk email", "Click", "Click below", "Click here", "Click to remove", "Direct email",
|
||
"Direct marketing", "Email harvest", "Email marketing", "Form", "Increase sales", "Increase traffic",
|
||
"Increase your sales", "Internet market", "Internet marketing", "Marketing", "Marketing solutions", "Mass email",
|
||
"Member", "Month trial offer", "More Internet Traffic", "Multi level marketing", "Notspam", "One time mailing",
|
||
"Online marketing", "Open", "Opt in", "Performance", "Removal instructions", "Sale", "Sales",
|
||
"Search engine listings", "Search engines", "Subscribe", "The following form", "This isn't junk", "This isn't spam",
|
||
"Undisclosed recipient", "Unsubscribe", "Visit our website", "We hate spam", "Web traffic", "Will not believe your eyes",
|
||
)
|
||
),
|
||
|
||
"Medical" : (
|
||
"",
|
||
(
|
||
"Cures baldness", "Diagnostic", "Fast Viagra delivery", "Human growth hormone", "Life insurance",
|
||
"Lose weight", "Lose weight spam", "Medicine", "No medical exams", "Online pharmacy", "Removes wrinkles",
|
||
"Reverses aging", "Stop snoring", "Valium", "Viagra", "Vicodin", "Weight loss", "Xanax",
|
||
)
|
||
),
|
||
|
||
"Numbers" : (
|
||
"",
|
||
(
|
||
"#1", "100% free", "100% satisfied", "4U", "50% off", "Billion", "Billion dollars", "Join millions",
|
||
"Join millions of Americans", "Million", "One hundred percent guaranteed", "Thousands",
|
||
)
|
||
),
|
||
|
||
"Offers" : (
|
||
"",
|
||
(
|
||
"Being a member", "Billing address", "Call", "Cannot be combined with any other offer",
|
||
"Confidentially on all orders", "Deal", "Financial freedom", "Gift certificate", "Giving away",
|
||
"Guarantee", "Have you been turned down?", "If only it were that easy", "Important information regarding",
|
||
"In accordance with laws", "Long distance phone offer", "Mail in order form", "Message contains",
|
||
"Name brand", "Nigerian", "No age restrictions", "No catch", "No claim forms", "No disappointment",
|
||
"No experience", "No gimmick", "No inventory", "No middleman", "No obligation", "No purchase necessary",
|
||
"No questions asked", "No selling", "No strings attached", "No-obligation", "Not intended",
|
||
"Obligation", "Off shore", "Offer", "Per day", "Per week", "Priority mail", "Prize", "Prizes",
|
||
"Produced and sent out", "Reserves the right", "Shopping spree", "Stuff on sale", "Terms and conditions",
|
||
"The best rates", "They’re just giving it away", "Trial", "Unlimited", "Unsolicited", "Vacation",
|
||
"Vacation offers", "Warranty", "We honor all", "Weekend getaway", "What are you waiting for?", "Who really wins?",
|
||
"Win", "Winner", "Winning", "Won", "You are a winner!", "You have been selected", "You’re a Winner!",
|
||
)
|
||
),
|
||
|
||
"Calls-to-Action" : (
|
||
"",
|
||
(
|
||
"Cancel at any time", "Compare", "Copy accurately", "Get", "Give it away", "Print form signature",
|
||
"Print out and fax", "See for yourself", "Sign up free today",
|
||
)
|
||
),
|
||
|
||
"Free" : (
|
||
"",
|
||
(
|
||
"Free", "Free access", "Free cell phone", "Free consultation", "Free DVD", "Free gift", "Free grant money",
|
||
"Free hosting", "Free installation", "Free Instant", "Free investment", "Free leads", "Free membership",
|
||
"Free money", "Free offer", "Free preview", "Free priority mail", "Free quote", "Free sample",
|
||
"Free trial", "Free website",
|
||
)
|
||
),
|
||
|
||
"Descriptions/Adjectives" : (
|
||
"",
|
||
(
|
||
"All natural", "All new", "Amazing", "Certified", "Congratulations", "Drastically reduced", "Fantastic deal",
|
||
"For free", "Guaranteed", "It’s effective", "Outstanding values", "Promise you", "Real thing",
|
||
"Risk free", "Satisfaction guaranteed",
|
||
)
|
||
),
|
||
|
||
"Sense of Urgency" : (
|
||
"",
|
||
(
|
||
"Access", "Act now!", "Apply now", "Apply online", "Call free", "Call now", "Can't live without", "Do it today",
|
||
"Don't delete", "Don't hesitate", "For instant access", "For Only", "For you", "Get it now", "Get started now",
|
||
"Great offer", "Info you requested", "Information you requested", "Instant", "Limited time", "New customers only",
|
||
"Now", "Now only", "Offer expires", "Once in lifetime", "One time", "Only", "Order now", "Order today",
|
||
"Please read", "Special promotion", "Supplies are limited", "Take action now", "Time limited", "Urgent",
|
||
"While supplies last",
|
||
)
|
||
),
|
||
|
||
"Nouns" : (
|
||
"",
|
||
(
|
||
"Addresses on CD", "Beverage", "Bonus", "Brand new pager", "Cable converter", "Casino", "Celebrity",
|
||
"Copy DVDs", "Laser printer", "Legal", "Luxury car", "New domain extensions", "Phone", "Rolex", "Stainless steel"
|
||
)
|
||
)
|
||
}
|
||
|
||
def __init__(self, options):
|
||
self.options = options
|
||
self.results = {}
|
||
|
||
def parse(self, html):
|
||
self.html = html
|
||
self.soup = BeautifulSoup(html, features="lxml")
|
||
|
||
self.results['Embedded Images'] = self.testEmbeddedImages()
|
||
self.results['Images without ALT'] = self.testImagesNoAlt()
|
||
self.results['Masqueraded Links'] = self.testMaskedLinks()
|
||
self.results['Use of underline tag <u>'] = self.testUnderlineTag()
|
||
self.results['HTML code in <a> link tags'] = self.testLinksWithHtmlCode()
|
||
self.results['<a href="..."> URL contained GET parameter'] = self.testLinksWithGETParams()
|
||
self.results['<a href="..."> URL contained GET parameter with URL'] = self.testLinksWithGETParamsBeingURLs()
|
||
self.results['<a href="..."> URL pointed to an executable file'] = self.testLinksWithDangerousExtensions()
|
||
self.results['Mail message contained suspicious words'] = self.testSuspiciousWords()
|
||
|
||
return {k: v for k, v in self.results.items() if v}
|
||
|
||
@staticmethod
|
||
def context(tag):
|
||
s = str(tag)
|
||
|
||
if len(s) < 100:
|
||
return s
|
||
|
||
beg = s[:50]
|
||
end = s[-50:]
|
||
|
||
return f'{beg}...{end}'
|
||
|
||
def testUnderlineTag(self):
|
||
links = self.soup('u')
|
||
|
||
if not links or len(links) == 0:
|
||
return []
|
||
|
||
desc = 'Underline tags are recognized by anti-spam filters and trigger additional rule (Office365: 67856001), but by their own shouldnt impact spam score.'
|
||
result = f'- Found {len(links)} <u> tags. This is not by itself an indication of spam, but is known to trigger some rules (like Office365: 67856001)\n'
|
||
|
||
context = ''
|
||
for i in range(len(links)):
|
||
context += str(links[i]) + '\n\n'
|
||
if i > 5: break
|
||
|
||
return {
|
||
'description' : desc,
|
||
'context' : context,
|
||
'analysis' : result
|
||
}
|
||
|
||
def testSuspiciousWords(self):
|
||
desc = '''
|
||
|
||
Input text message contained words considered as suspicious in context of E-Mails.
|
||
Therefore you will have better chances of delivering your phishing e-mail when you get rid of them.
|
||
|
||
'''
|
||
context = ''
|
||
result = ''
|
||
|
||
text = self.html
|
||
foundWords = set()
|
||
totalChecked = 0
|
||
totalFound = 0
|
||
|
||
for title, words in PhishingMailParser.Suspicious_Words.items():
|
||
found = set()
|
||
|
||
for word in words[1]:
|
||
if word.lower() in foundWords:
|
||
continue
|
||
|
||
totalChecked += 1
|
||
if re.search(r'\b' + re.escape(word) + r'\b', text, re.I):
|
||
found.add(word.lower())
|
||
|
||
foundWords.add(word.lower())
|
||
pos = text.find(word.lower())
|
||
|
||
if pos != -1:
|
||
line = ''
|
||
N = 50
|
||
if pos > N:
|
||
line = text[pos-N:pos]
|
||
|
||
line += text[pos:pos+N]
|
||
pos2 = line.find(word.lower())
|
||
|
||
line = line[:pos2] + logger.colored(line[pos2:pos2+len(word)], "red") + line[pos2+len(word):]
|
||
line = line.replace('\n', '')
|
||
line = re.sub(r' {2,}', ' ', line)
|
||
|
||
context += '\n' + line + '\n'
|
||
|
||
if len(found) > 0:
|
||
totalFound += len(found)
|
||
result += f'- Found {logger.colored(len(found), "red")} {logger.colored(title, "yellow")} words {logger.colored(words[0], "cyan")}:\n'
|
||
|
||
for w in found:
|
||
result += f'\t- {w}\n'
|
||
|
||
result += '\n'
|
||
|
||
if totalFound == 0:
|
||
return {}
|
||
|
||
result += f'- Found in total {logger.colored(totalFound, "red")} suspicious words (out of {totalChecked} total checked).\n'
|
||
|
||
return {
|
||
'description' : desc,
|
||
'context' : context,
|
||
'analysis' : result
|
||
}
|
||
|
||
def testLinksWithHtmlCode(self):
|
||
links = self.soup('a')
|
||
|
||
desc = 'Links that contain HTML code within <a> ... </a> may increase Spam score heavily'
|
||
context = ''
|
||
result = ''
|
||
num = 0
|
||
embed = ''
|
||
|
||
for link in links:
|
||
text = str(link)
|
||
pos = text.find('>')
|
||
code = text[pos+1:]
|
||
|
||
m = re.search(r'(.+)<\s*/\s*a\s*>', code, re.I)
|
||
if m:
|
||
code = m.group(1)
|
||
|
||
suspicious = '<' in text and '>' in text
|
||
|
||
if suspicious:
|
||
num += 1
|
||
|
||
if num < 5:
|
||
N = 70
|
||
tmp = text[:N]
|
||
|
||
if len(text) > N:
|
||
tmp += ' ... ' + text[-N:]
|
||
|
||
context += tmp + '\n'
|
||
|
||
code2 = PhishingMailParser.context(code)
|
||
context += f"\n\t- {logger.colored('Code inside of <a> tag:','red')}\n\t\t" + logger.colored(code2, 'yellow') + '\n'
|
||
|
||
if num > 0:
|
||
result += f'- Found {num} <a> tags that contained HTML code inside!\n'
|
||
result += '\t Links conveying HTML code within <a> ... </a> may greatly increase message Spam score!\n'
|
||
|
||
if len(result) == 0:
|
||
return []
|
||
|
||
return {
|
||
'description' : desc,
|
||
'context' : context,
|
||
'analysis' : result
|
||
}
|
||
|
||
|
||
def testLinksWithGETParams(self):
|
||
links = self.soup('a')
|
||
|
||
desc = 'Links with URLs containing GET parameters will be noticed by anti-spam filters resulting in another rule triggering on message (Office365: 21615005).'
|
||
context = ''
|
||
result = ''
|
||
num = 0
|
||
embed = ''
|
||
|
||
for link in links:
|
||
try:
|
||
href = link['href']
|
||
except:
|
||
continue
|
||
|
||
text = link.getText()
|
||
params = dict(parse.parse_qsl(parse.urlsplit(href).query))
|
||
|
||
if len(params) > 0:
|
||
num += 1
|
||
|
||
if num < 5:
|
||
context += PhishingMailParser.context(link) + '\n'
|
||
hr = href[:90]
|
||
pos = hr.find('?')
|
||
hr = hr[:pos] + logger.colored(hr[pos:], 'yellow')
|
||
|
||
context += f'\thref = "{hr}"\n'
|
||
context += f'\ttext = "{text[:90]}"\n\n'
|
||
|
||
if num > 0:
|
||
result += f'- Found {num} <a> tags with href="..." URLs containing GET params.\n'
|
||
result += '\t Links with URLs that contain GET params might trigger anti-spam rule (Office365: 21615005)\n'
|
||
|
||
if len(result) == 0:
|
||
return []
|
||
|
||
return {
|
||
'description' : desc,
|
||
'context' : context,
|
||
'analysis' : result
|
||
}
|
||
|
||
def testLinksWithDangerousExtensions(self):
|
||
links = self.soup('a')
|
||
|
||
desc = 'Message contained <a> tags with href="..." links pointing to a file with dangerous extension (such as .exe)'
|
||
context = ''
|
||
result = ''
|
||
num = 0
|
||
embed = ''
|
||
|
||
for link in links:
|
||
try:
|
||
href = link['href']
|
||
except:
|
||
continue
|
||
|
||
text = link.getText()
|
||
parsed = parse.urlsplit(href)
|
||
|
||
if '.' not in parsed.path:
|
||
continue
|
||
|
||
pos = parsed.path.rfind('.')
|
||
if pos == -1:
|
||
continue
|
||
|
||
extension = parsed.path.lower()[pos:]
|
||
|
||
if extension in executable_extensions:
|
||
num += 1
|
||
|
||
if num < 5:
|
||
context += PhishingMailParser.context(link) + '\n'
|
||
hr = href[:90]
|
||
pos1 = hr.lower().find(extension.lower())
|
||
|
||
hr = logger.colored(hr[:pos1], 'yellow') + logger.colored(hr[pos1:pos1+len(extension)], 'red') + logger.colored(hr[pos1+len(extension):], 'yellow')
|
||
|
||
context += f'\thref = "{hr}"\n'
|
||
context += f'\ttext = "{text[:90]}"\n\n'
|
||
|
||
context += f'\tExtension matched: {logger.colored(extension, "red")}\n'
|
||
|
||
if num > 0:
|
||
result += f'- Found {num} <a> tags with href="..." URLs pointing to files with dangerous extensions (such as .exe).\n'
|
||
result += '\t Links with URLs that point to potentially executable files might trigger anti-spam rule (Office365: 460985005)\n'
|
||
|
||
if len(result) == 0:
|
||
return []
|
||
|
||
return {
|
||
'description' : desc,
|
||
'context' : context,
|
||
'analysis' : result
|
||
}
|
||
|
||
def testLinksWithGETParamsBeingURLs(self):
|
||
links = self.soup('a')
|
||
|
||
desc = 'Links with URLs that contain GET parameters pointing to another URL, will trigger two Office365 anti-spam rules (Office365: 45080400002).'
|
||
context = ''
|
||
result = ''
|
||
num = 0
|
||
embed = ''
|
||
|
||
for link in links:
|
||
try:
|
||
href = link['href']
|
||
except:
|
||
continue
|
||
|
||
text = link.getText()
|
||
params = dict(parse.parse_qsl(parse.urlsplit(href).query))
|
||
|
||
url = re.compile(r'((http|https)\:\/\/)?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*')
|
||
|
||
if len(params) > 0:
|
||
for k, v in params.items():
|
||
m = url.match(v)
|
||
|
||
if m:
|
||
urlmatched = m.group(1)
|
||
num += 1
|
||
|
||
if num < 5:
|
||
context += PhishingMailParser.context(link) + '\n'
|
||
|
||
hr = href[:90]
|
||
hr = logger.colored(hr, 'yellow')
|
||
|
||
context += f'\thref = "{hr}"\n'
|
||
context += f'\ttext = "{text[:90]}"\n\n'
|
||
context += f'\thref URL GET parameter contained another URL:\n\t\t' + logger.colored(v, "red") + '\n'
|
||
|
||
if num > 0:
|
||
result += f'- Found {num} <a> tags with href="..." URLs containing GET params containing another URL.\n'
|
||
result += '\t Links with URLs that contain GET params with another URL might trigger anti-spam rule (Office365: 45080400002)\n'
|
||
|
||
if len(result) == 0:
|
||
return []
|
||
|
||
return {
|
||
'description' : desc,
|
||
'context' : context,
|
||
'analysis' : result
|
||
}
|
||
|
||
|
||
def testMaskedLinks(self):
|
||
links = self.soup('a')
|
||
|
||
desc = 'Links that masquerade their href= attribute by displaying different link are considered harmful and will increase Spam score.'
|
||
context = ''
|
||
result = ''
|
||
num = 0
|
||
embed = ''
|
||
|
||
for link in links:
|
||
try:
|
||
href = link['href']
|
||
except:
|
||
continue
|
||
|
||
text = link.getText()
|
||
|
||
url = re.compile(r'((http|https)\:\/\/)?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*')
|
||
url2 = re.compile(r'((http|https)\:\/\/)[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*')
|
||
|
||
m1 = url.match(href)
|
||
m2 = url2.search(text)
|
||
|
||
if m1 and m2:
|
||
num += 1
|
||
|
||
if num < 5:
|
||
context += PhishingMailParser.context(link) + '\n'
|
||
context += f'\thref = "{logger.colored(href[:90],"green")}"\n'
|
||
context += f'\ttext = "{logger.colored(text[:90],"red")}"\n\n'
|
||
|
||
if num > 0:
|
||
result += f'- Found {num} <a> tags that masquerade their href="" links with text!\n'
|
||
result += '\t Links that try to hide underyling URL are harmful and will be considered as Spam!\n'
|
||
|
||
if len(result) == 0:
|
||
return []
|
||
|
||
return {
|
||
'description' : desc,
|
||
'context' : context,
|
||
'analysis' : result
|
||
}
|
||
|
||
def testImagesNoAlt(self):
|
||
images = self.soup('img')
|
||
|
||
desc = 'Images without ALT="value" attribute may increase Spam scorage.'
|
||
context = ''
|
||
result = ''
|
||
num = 0
|
||
embed = ''
|
||
|
||
for img in images:
|
||
src = img['src']
|
||
alt = ''
|
||
|
||
try:
|
||
alt = img['alt']
|
||
except:
|
||
pass
|
||
|
||
if alt == '':
|
||
num += 1
|
||
|
||
if num < 5:
|
||
context += PhishingMailParser.context(img) + '\n\n'
|
||
|
||
if num > 0:
|
||
result += f'- Found {num} <img> tags without ALT="value" attribute.\n'
|
||
result += '\t Images without alternate text set in their attribute may increase Spam score\n'
|
||
|
||
if len(result) == 0:
|
||
return []
|
||
|
||
return {
|
||
'description' : desc,
|
||
'context' : context,
|
||
'analysis' : result
|
||
}
|
||
|
||
def testEmbeddedImages(self):
|
||
images = self.soup('img')
|
||
|
||
desc = 'Embedded images can increase Spam Confidence Level (SCL) in Office365 by 4 points. Embedded images are those with <img src="data:image/png;base64,<BLOB>"/> . They should be avoided.'
|
||
context = ''
|
||
result = ''
|
||
num = 0
|
||
embed = ''
|
||
|
||
for img in images:
|
||
src = img['src']
|
||
alt = ''
|
||
|
||
try:
|
||
alt = img['alt']
|
||
except:
|
||
pass
|
||
|
||
if src.lower().startswith('data:image/'):
|
||
if len(embed) == 0:
|
||
embed = src[:30]
|
||
|
||
num += 1
|
||
|
||
if num < 5:
|
||
if len(alt) > 0:
|
||
context += f'- ALT="{alt}": ' + PhishingMailParser.context(img) + '\n'
|
||
else:
|
||
ctx = PhishingMailParser.context(img)
|
||
pos = ctx.find('data:')
|
||
pos2 = ctx.find('"', pos+1)
|
||
|
||
ctx = logger.colored(ctx[:pos], 'yellow') + logger.colored(ctx[pos:pos2], 'red') + logger.colored(ctx[pos2:], 'yellow')
|
||
|
||
context += ctx + '\n'
|
||
|
||
if num > 0:
|
||
result += f'- Found {num} <img> tags with embedded image ({embed}).\n'
|
||
result += '\t Embedded images increase Office365 SCL (Spam) level by 4 points!\n'
|
||
|
||
if len(result) == 0:
|
||
return []
|
||
|
||
return {
|
||
'description' : desc,
|
||
'context' : context,
|
||
'analysis' : result
|
||
}
|
||
|
||
|
||
def printOutput(out):
|
||
if options['format'] == 'text':
|
||
width = 100
|
||
num = 0
|
||
|
||
for k, v in out.items():
|
||
num += 1
|
||
analysis = v['analysis'].strip()
|
||
context = v['context'].strip()
|
||
desc = '\n'.join(textwrap.wrap(
|
||
v['description'],
|
||
width = 80,
|
||
initial_indent = '',
|
||
subsequent_indent = ' '
|
||
)).strip()
|
||
|
||
analysis = analysis.replace('- ', '\t- ')
|
||
|
||
print(f'''
|
||
------------------------------------------
|
||
({num}) Test: {logger.colored(k, "cyan")}
|
||
|
||
{logger.colored("DESCRIPTION", "blue")}:
|
||
|
||
{desc}
|
||
|
||
{logger.colored("CONTEXT", "blue")}:
|
||
|
||
{context}
|
||
|
||
{logger.colored("ANALYSIS", "blue")}:
|
||
|
||
{analysis}
|
||
''')
|
||
|
||
elif options['format'] == 'json':
|
||
print(json.dumps(out))
|
||
|
||
def opts(argv):
|
||
global options
|
||
global headers
|
||
|
||
o = argparse.ArgumentParser(
|
||
usage = 'phishing-HTML-linter.py [options] <file.html>'
|
||
)
|
||
|
||
req = o.add_argument_group('Required arguments')
|
||
req.add_argument('file', help = 'Input HTML file')
|
||
|
||
args = o.parse_args()
|
||
options.update(vars(args))
|
||
return args
|
||
|
||
def main(argv):
|
||
args = opts(argv)
|
||
if not args:
|
||
return False
|
||
|
||
print('''
|
||
:: Phishing HTML Linter
|
||
Shows you bad smells in your HTML code that will get your mails busted!
|
||
Mariusz Banach / mgeeky
|
||
''')
|
||
|
||
html = ''
|
||
with open(args.file, 'rb') as f:
|
||
html = f.read()
|
||
|
||
p = PhishingMailParser({})
|
||
ret = p.parse(html.decode())
|
||
|
||
if len(ret) > 0:
|
||
printOutput(ret)
|
||
|
||
else:
|
||
print('\n[+] Congrats! Your message does not have any known bad smells that could trigger anti-spam rules.\n')
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main(sys.argv)
|