2021-10-17 15:22:05 +02:00
#!/usr/bin/python3
import os , sys , re
import string
import argparse
import yaml
2021-10-28 21:12:23 +02:00
import textwrap
2021-10-17 15:22:05 +02:00
import json
2021-10-29 03:22:54 +02:00
from urllib import parse
2021-10-17 15:22:05 +02:00
from bs4 import BeautifulSoup
options = {
' format ' : ' text ' ,
}
2021-10-29 03:22:54 +02:00
executable_extensions = [
' .exe ' ,
' .dll ' ,
' .lnk ' ,
' .scr ' ,
' .sys ' ,
' .ps1 ' ,
' .bat ' ,
' .js ' ,
' .jse ' ,
' .vbs ' ,
' .vba ' ,
' .vbe ' ,
' .wsl ' ,
' .cpl ' ,
]
options = {
' debug ' : False ,
' verbose ' : False ,
' nocolor ' : False ,
' log ' : sys . stderr ,
' format ' : ' text ' ,
}
class Logger :
colors_map = {
' red ' : 31 ,
' green ' : 32 ,
' yellow ' : 33 ,
' blue ' : 34 ,
' magenta ' : 35 ,
' cyan ' : 36 ,
' white ' : 37 ,
' grey ' : 38 ,
}
colors_dict = {
' error ' : colors_map [ ' red ' ] ,
' trace ' : colors_map [ ' magenta ' ] ,
' info ' : colors_map [ ' green ' ] ,
' debug ' : colors_map [ ' grey ' ] ,
' other ' : colors_map [ ' grey ' ] ,
}
options = { }
def __init__ ( self , opts = None ) :
self . options . update ( Logger . options )
if opts != None and len ( opts ) > 0 :
self . options . update ( opts )
@staticmethod
def with_color ( c , s ) :
return " \x1b [ %d m %s \x1b [0m " % ( c , s )
def colored ( self , txt , col ) :
if self . options [ ' nocolor ' ] :
return txt
return Logger . with_color ( Logger . colors_map [ col ] , txt )
# Invocation:
# def out(txt, mode='info ', fd=None, color=None, noprefix=False, newline=True):
@staticmethod
def out ( txt , fd , mode = ' info ' , * * kwargs ) :
if txt == None or fd == ' none ' :
return
elif fd == None :
raise Exception ( ' [ERROR] Logging descriptor has not been specified! ' )
args = {
' color ' : None ,
' noprefix ' : False ,
' newline ' : True ,
' nocolor ' : False
}
args . update ( kwargs )
if type ( txt ) != str :
txt = str ( txt )
txt = txt . replace ( ' \t ' , ' ' * 4 )
if args [ ' nocolor ' ] :
col = ' '
elif args [ ' color ' ] :
col = args [ ' color ' ]
if type ( col ) == str and col in Logger . colors_map . keys ( ) :
col = Logger . colors_map [ col ]
else :
col = Logger . colors_dict . setdefault ( mode , Logger . colors_map [ ' grey ' ] )
prefix = ' '
if mode :
mode = ' [ %s ] ' % mode
if not args [ ' noprefix ' ] :
if args [ ' nocolor ' ] :
prefix = mode . upper ( )
else :
prefix = Logger . with_color ( Logger . colors_dict [ ' other ' ] , ' %s '
% ( mode . upper ( ) ) )
nl = ' '
if ' newline ' in args :
if args [ ' newline ' ] :
nl = ' \n '
if ' force_stdout ' in args :
fd = sys . stdout
if type ( fd ) == str :
with open ( fd , ' a ' ) as f :
prefix2 = ' '
if mode :
prefix2 = ' %s ' % ( mode . upper ( ) )
f . write ( prefix2 + txt + nl )
f . flush ( )
else :
if args [ ' nocolor ' ] :
fd . write ( prefix + txt + nl )
else :
fd . write ( prefix + Logger . with_color ( col , txt ) + nl )
# Info shall be used as an ordinary logging facility, for every desired output.
def info ( self , txt , forced = False , * * kwargs ) :
kwargs [ ' nocolor ' ] = self . options [ ' nocolor ' ]
if forced or ( self . options [ ' verbose ' ] or \
self . options [ ' debug ' ] ) \
or ( type ( self . options [ ' log ' ] ) == str and self . options [ ' log ' ] != ' none ' ) :
Logger . out ( txt , self . options [ ' log ' ] , ' info ' , * * kwargs )
def text ( self , txt , * * kwargs ) :
kwargs [ ' noPrefix ' ] = True
kwargs [ ' nocolor ' ] = self . options [ ' nocolor ' ]
Logger . out ( txt , self . options [ ' log ' ] , ' ' , * * kwargs )
def dbg ( self , txt , * * kwargs ) :
if self . options [ ' debug ' ] :
kwargs [ ' nocolor ' ] = self . options [ ' nocolor ' ]
Logger . out ( txt , self . options [ ' log ' ] , ' debug ' , * * kwargs )
def err ( self , txt , * * kwargs ) :
kwargs [ ' nocolor ' ] = self . options [ ' nocolor ' ]
Logger . out ( txt , self . options [ ' log ' ] , ' error ' , * * kwargs )
def fatal ( self , txt , * * kwargs ) :
kwargs [ ' nocolor ' ] = self . options [ ' nocolor ' ]
Logger . out ( txt , self . options [ ' log ' ] , ' error ' , * * kwargs )
os . _exit ( 1 )
logger = Logger ( options )
2021-10-17 15:22:05 +02:00
class PhishingMailParser :
2021-11-02 17:05:32 +01:00
#
# Based on:
# https://journeys.autopilotapp.com/blog/email-spam-trigger-words/
# https://www.activecampaign.com/blog/spam-words
# https://blog.hubspot.com/blog/tabid/6307/bid/30684/the-ultimate-list-of-email-spam-trigger-words.aspx
#
Suspicious_Words = {
' Manipulative ' : (
' creating unnecessary urgency or pressure ' ,
(
" Act now " ,
" Action " ,
" Apply now " ,
" Apply online " ,
" Buy " ,
" Buy direct " ,
" Call " ,
" Call now " ,
" Click here " ,
" Clearance " ,
" Click here " ,
" Do it today " ,
" Don ' t delete " ,
" Drastically reduced " ,
" Exclusive deal " ,
" Expire " ,
" Get " ,
" Get it now " ,
" Get started now " ,
" Important information regarding " ,
" Instant " ,
" Limited time " ,
" New customers only " ,
" Now only " ,
" Offer expires " ,
" Once in a lifetime " ,
" Order now " ,
" Please read " ,
" Special promotion " ,
" Take action " ,
" This won ' t last " ,
" Urgent " ,
" While stocks last "
)
) ,
' Needy ' : (
' sounding desperate or exaggerated claims ' ,
(
" All-new " ,
" Bargain " ,
" Best price " ,
" Bonus " ,
" Email marketing " ,
" Free " ,
" For instant access " ,
" Free gift " ,
" Free trial " ,
" Have you been turned down? " ,
" Great offer " ,
" Join millions of Americans " ,
" Incredible deal " ,
" Prize " ,
" Satisfaction guaranteed " ,
" Will not believe your eyes "
)
) ,
' Sleazy ' : (
' being too pushy ' ,
(
" As seen on " ,
" Click here " ,
" Click below " ,
" Deal " ,
" Direct email " ,
" Direct marketing " ,
" Do it today " ,
" Order now " ,
" Order today " ,
" Unlimited " ,
" What are you waiting for? " ,
" Visit our website "
)
) ,
' Cheap ' : (
' no pre-qualifications, everybody wins ' ,
(
" Acceptance " ,
" Access " ,
" Avoid bankruptcy " ,
" Boss " ,
" Cancel " ,
" Card accepted " ,
" Certified " ,
" Cheap " ,
" Compare " ,
" Compare rates " ,
" Congratulations " ,
" Credit card offers " ,
" Cures " ,
" Dear " ,
" Dear friend " ,
" Drastically reduced " ,
" Easy terms " ,
" Free grant money " ,
" Free hosting " ,
" Free info " ,
" Free membership " ,
" Friend " ,
" Get out of debt " ,
" Giving away " ,
" Guarantee " ,
" Guaranteed " ,
" Have you been turned down? " ,
" Hello " ,
" Information you requested " ,
" Join millions " ,
" No age restrictions " ,
" No catch " ,
" No experience " ,
" No obligation " ,
" No purchase necessary " ,
" No questions asked " ,
" No strings attached " ,
" Offer " ,
" Opportunity " ,
" Save big " ,
" Winner " ,
" Winning " ,
" Won " ,
" You are a winner! " ,
" You ' ve been selected! "
)
) ,
' Far-fetched ' : (
' statements that are too good to be true ' ,
(
" Additional income " ,
" All-natural " ,
" Amazing " ,
" Be your own boss " ,
" Big bucks " ,
" Billion " ,
" Billion dollars " ,
" Cash " ,
" Cash bonus " ,
" Consolidate debt and credit " ,
" Consolidate your debt " ,
" Double your income " ,
" Earn " ,
" Earn cash " ,
" Earn extra cash " ,
" Eliminate bad credit " ,
" Eliminate debt " ,
" Extra " ,
" Fantastic deal " ,
" Financial freedom " ,
" Financially independent " ,
" Free investment " ,
" Free money " ,
" Get paid " ,
" Home " ,
" Home-based " ,
" Income " ,
" Increase sales " ,
" Increase traffic " ,
" Lose " ,
" Lose weight " ,
" Money back " ,
" No catch " ,
" No fees " ,
" No hidden costs " ,
" No strings attached " ,
" Potential earnings " ,
" Pure profit " ,
" Removes wrinkles " ,
" Reverses aging " ,
" Risk-free " ,
" Serious cash " ,
" Stop snoring " ,
" Vacation " ,
" Vacation offers " ,
" Weekend getaway " ,
" Weight loss " ,
" While you sleep " ,
" Work from home "
)
) ,
' Exaggeration ' : (
' exaggerated claims and promises ' ,
(
" 100 % more " ,
" 100 % f ree " ,
" 100 % s atisfied " ,
" Additional income " ,
" Be your own boss " ,
" Best price " ,
" Big bucks " ,
" Billion " ,
" Cash bonus " ,
" Cents on the dollar " ,
" Consolidate debt " ,
" Double your cash " ,
" Double your income " ,
" Earn extra cash " ,
" Earn money " ,
" Eliminate bad credit " ,
" Extra cash " ,
" Extra income " ,
" Expect to earn " ,
" Fast cash " ,
" Financial freedom " ,
" Free access " ,
" Free consultation " ,
" Free gift " ,
" Free hosting " ,
" Free info " ,
" Free investment " ,
" Free membership " ,
" Free money " ,
" Free preview " ,
" Free quote " ,
" Free trial " ,
" Full refund " ,
" Get out of debt " ,
" Get paid " ,
" Giveaway " ,
" Guaranteed " ,
" Increase sales " ,
" Increase traffic " ,
" Incredible deal " ,
" Lower rates " ,
" Lowest price " ,
" Make money " ,
" Million dollars " ,
" Miracle " ,
" Money back " ,
" Once in a lifetime " ,
" One time " ,
" Pennies a day " ,
" Potential earnings " ,
" Prize " ,
" Promise " ,
" Pure profit " ,
" Risk-free " ,
" Satisfaction guaranteed " ,
" Save big money " ,
" Save up to " ,
" Special promotion " ,
)
) ,
' Urgency ' : (
' create unnecessary urgency and pressure ' ,
(
" Act now " ,
" Apply now " ,
" Become a member " ,
" Call now " ,
" Click below " ,
" Click here " ,
" Get it now " ,
" Do it today " ,
" Don’ t delete " ,
" Exclusive deal " ,
" Get started now " ,
" Important information regarding " ,
" Information you requested " ,
" Instant " ,
" Limited time " ,
" New customers only " ,
" Order now " ,
" Please read " ,
" See for yourself " ,
" Sign up free " ,
" Take action " ,
" This won’ t last " ,
" Urgent " ,
" What are you waiting for? " ,
" While supplies last " ,
" Will not believe your eyes " ,
" Winner " ,
" Winning " ,
" You are a winner " ,
" You have been selected " ,
)
) ,
' Spammy ' : (
' shady, spammy, or unethical behavior ' ,
(
" Bulk email " ,
" Buy direct " ,
" Cancel at any time " ,
" Check or money order " ,
" Congratulations " ,
" Confidentiality " ,
" Cures " ,
" Dear friend " ,
" Direct email " ,
" Direct marketing " ,
" Hidden charges " ,
" Human growth hormone " ,
" Internet marketing " ,
" Lose weight " ,
" Mass email " ,
" Meet singles " ,
" Multi-level marketing " ,
" No catch " ,
" No cost " ,
" No credit check " ,
" No fees " ,
" No gimmick " ,
" No hidden costs " ,
" No hidden fees " ,
" No interest " ,
" No investment " ,
" No obligation " ,
" No purchase necessary " ,
" No questions asked " ,
" No strings attached " ,
" Not junk " ,
" Notspam " ,
" Obligation " ,
" Passwords " ,
" Requires initial investment " ,
" Social security number " ,
" This isn’ t a scam " ,
" This isn’ t junk " ,
" This isn’ t spam " ,
" Undisclosed " ,
" Unsecured credit " ,
" Unsecured debt " ,
" Unsolicited " ,
" Valium " ,
" Viagra " ,
" Vicodin " ,
" We hate spam " ,
" Weight loss " ,
" Xanax " ,
)
) ,
' Jargon ' : (
' jargon or legalese ' ,
(
" Accept credit cards " ,
" All new " ,
" As seen on " ,
" Bargain " ,
" Beneficiary " ,
" Billing " ,
" Bonus " ,
" Cards accepted " ,
" Cash " ,
" Certified " ,
" Cheap " ,
" Claims " ,
" Clearance " ,
" Compare rates " ,
" Credit card offers " ,
" Deal " ,
" Debt " ,
" Discount " ,
" Fantastic " ,
" In accordance with laws " ,
" Income " ,
" Investment " ,
" Join millions " ,
" Lifetime " ,
" Loans " ,
" Luxury " ,
" Marketing solution " ,
" Message contains " ,
" Mortgage rates " ,
" Name brand " ,
" Offer " ,
" Online marketing " ,
" Opt in " ,
" Pre-approved " ,
" Quote " ,
" Rates " ,
" Refinance " ,
" Removal " ,
" Reserves the right " ,
" Score " ,
" Search engine " ,
" Sent in compliance " ,
" Subject to " ,
" Terms and conditions " ,
" Trial " ,
" Unlimited " ,
" Warranty " ,
" Web traffic " ,
" Work from home " ,
)
) ,
' Shady ' : (
' ethically or legally questionable behavior ' ,
(
" Addresses " ,
" Beneficiary " ,
" Billing " ,
" Casino " ,
" Celebrity " ,
" Collect child support " ,
" Copy DVDs " ,
" Fast viagra delivery " ,
" Hidden " ,
" Human growth hormone " ,
" In accordance with laws " ,
" Investment " ,
" Junk " ,
" Legal " ,
" Life insurance " ,
" Loan " ,
" Lottery " ,
" Luxury car " ,
" Medicine " ,
" Meet singles " ,
" Message contains " ,
" Miracle " ,
" Money " ,
" Multi-level marketing " ,
" Nigerian " ,
" Offshore " ,
" Online degree " ,
" Online pharmacy " ,
" Passwords " ,
" Refinance " ,
" Request " ,
" Rolex " ,
" Score " ,
" Social security number " ,
" Spam " ,
" This isn ' t spam " ,
" Undisclosed recipient " ,
" University diplomas " ,
" Unsecured credit " ,
" Unsolicited " ,
" US dollars " ,
" Valium " ,
" Viagra " ,
" Vicodin " ,
" Warranty " ,
" Xanax "
)
) ,
" Commerce " : (
" " ,
(
" As seen on " ,
" Buy " ,
" Buy direct " ,
" Buying judgments " ,
" Clearance " ,
" Order " ,
" Order status " ,
" Orders shipped by shopper " ,
)
) ,
" Personal " : (
" " ,
(
" Dig up dirt on friends " ,
" Meet singles " ,
" Score with babes " ,
" XXX " ,
" Near you " ,
)
) ,
" Employment " : (
" " ,
(
" Additional income " ,
" Be your own boss " ,
" Compete for your business " ,
" Double your " ,
" Earn $ " ,
" Earn extra cash " ,
" Earn per week " ,
" Expect to earn " ,
" Extra income " ,
" Home based " ,
" Home employment " ,
" Homebased business " ,
" Income from home " ,
" Make $ " ,
" Make money " ,
" Money making " ,
" Online biz opportunity " ,
" Online degree " ,
" Opportunity " ,
" Potential earnings " ,
" University diplomas " ,
" While you sleep " ,
" Work at home " ,
" Work from home " ,
)
) ,
" Financial - General " : (
" " ,
(
" $$$ " ,
" Affordable " ,
" Bargain " ,
" Beneficiary " ,
" Best price " ,
" Big bucks " ,
" Cash " ,
" Cash bonus " ,
" Cashcashcash " ,
" Cents on the dollar " ,
" Cheap " ,
" Check " ,
" Claims " ,
" Collect " ,
" Compare rates " ,
" Cost " ,
" Credit " ,
" Credit bureaus " ,
" Discount " ,
" Earn " ,
" Easy terms " ,
" F r e e " ,
" Fast cash " ,
" For just $XXX " ,
" Hidden assets " ,
" hidden charges " ,
" Income " ,
" Incredible deal " ,
" Insurance " ,
" Investment " ,
" Loans " ,
" Lowest price " ,
" Million dollars " ,
" Money " ,
" Money back " ,
" Mortgage " ,
" Mortgage rates " ,
" No cost " ,
" No fees " ,
" One hundred percent free " ,
" Only $ " ,
" Pennies a day " ,
" Price " ,
" Profits " ,
" Pure profit " ,
" Quote " ,
" Refinance " ,
" Save $ " ,
" Save big money " ,
" Save up to " ,
" Serious cash " ,
" Subject to credit " ,
" They keep your money — no refund! " ,
" Unsecured credit " ,
" Unsecured debt " ,
" US dollars " ,
" Why pay more? " ,
)
) ,
" Financial - Business " : (
" " ,
(
" Accept credit cards " ,
" Cards accepted " ,
" Check or money order " ,
" Credit card offers " ,
" Explode your business " ,
" Full refund " ,
" Investment decision " ,
" No credit check " ,
" No hidden Costs " ,
" No investment " ,
" Requires initial investment " ,
" Sent in compliance " ,
" Stock alert " ,
" Stock disclaimer statement " ,
" Stock pick " ,
)
) ,
" Financial - Personal " : (
" " ,
(
" Avoice bankruptcy " ,
" Calling creditors " ,
" Collect child support " ,
" Consolidate debt and credit " ,
" Consolidate your debt " ,
" Eliminate bad credit " ,
" Eliminate debt " ,
" Financially independent " ,
" Get out of debt " ,
" Get paid " ,
" Lower interest rate " ,
" Lower monthly payment " ,
" Lower your mortgage rate " ,
" Lowest insurance rates " ,
" Pre-approved " ,
" Refinance home " ,
" Social security number " ,
" Your income " ,
)
) ,
" General " : (
" " ,
(
" Acceptance " ,
" Accordingly " ,
" Avoid " ,
" Chance " ,
" Dormant " ,
" Freedom " ,
" Here " ,
" Hidden " ,
" Home " ,
" Leave " ,
" Lifetime " ,
" Lose " ,
" Maintained " ,
" Medium " ,
" Miracle " ,
" Never " ,
" Passwords " ,
" Problem " ,
" Remove " ,
" Reverses " ,
" Sample " ,
" Satisfaction " ,
" Solution " ,
" Stop " ,
" Success " ,
" Teen " ,
" Wife " ,
)
) ,
" Greetings " : (
" " ,
(
" Dear " ,
" Friend " ,
" Hello " ,
)
) ,
" Marketing " : (
" " ,
(
" Ad " ,
" Auto email removal " ,
" Bulk email " ,
" Click " ,
" Click below " ,
" Click here " ,
" Click to remove " ,
" Direct email " ,
" Direct marketing " ,
" Email harvest " ,
" Email marketing " ,
" Form " ,
" Increase sales " ,
" Increase traffic " ,
" Increase your sales " ,
" Internet market " ,
" Internet marketing " ,
" Marketing " ,
" Marketing solutions " ,
" Mass email " ,
" Member " ,
" Month trial offer " ,
" More Internet Traffic " ,
" Multi level marketing " ,
" Notspam " ,
" One time mailing " ,
" Online marketing " ,
" Open " ,
" Opt in " ,
" Performance " ,
" Removal instructions " ,
" Sale " ,
" Sales " ,
" Search engine listings " ,
" Search engines " ,
" Subscribe " ,
" The following form " ,
" This isn ' t junk " ,
" This isn ' t spam " ,
" Undisclosed recipient " ,
" Unsubscribe " ,
" Visit our website " ,
" We hate spam " ,
" Web traffic " ,
" Will not believe your eyes " ,
)
) ,
" Medical " : (
" " ,
(
" Cures baldness " ,
" Diagnostic " ,
" Fast Viagra delivery " ,
" Human growth hormone " ,
" Life insurance " ,
" Lose weight " ,
" Lose weight spam " ,
" Medicine " ,
" No medical exams " ,
" Online pharmacy " ,
" Removes wrinkles " ,
" Reverses aging " ,
" Stop snoring " ,
" Valium " ,
" Viagra " ,
" Vicodin " ,
" Weight loss " ,
" Xanax " ,
)
) ,
" Numbers " : (
" " ,
(
" #1 " ,
" 100 % f ree " ,
" 100 % s atisfied " ,
" 4U " ,
" 50 % o ff " ,
" Billion " ,
" Billion dollars " ,
" Join millions " ,
" Join millions of Americans " ,
" Million " ,
" One hundred percent guaranteed " ,
" Thousands " ,
)
) ,
" Offers " : (
" " ,
(
" Being a member " ,
" Billing address " ,
" Call " ,
" Cannot be combined with any other offer " ,
" Confidentially on all orders " ,
" Deal " ,
" Financial freedom " ,
" Gift certificate " ,
" Giving away " ,
" Guarantee " ,
" Have you been turned down? " ,
" If only it were that easy " ,
" Important information regarding " ,
" In accordance with laws " ,
" Long distance phone offer " ,
" Mail in order form " ,
" Message contains " ,
" Name brand " ,
" Nigerian " ,
" No age restrictions " ,
" No catch " ,
" No claim forms " ,
" No disappointment " ,
" No experience " ,
" No gimmick " ,
" No inventory " ,
" No middleman " ,
" No obligation " ,
" No purchase necessary " ,
" No questions asked " ,
" No selling " ,
" No strings attached " ,
" No-obligation " ,
" Not intended " ,
" Obligation " ,
" Off shore " ,
" Offer " ,
" Per day " ,
" Per week " ,
" Priority mail " ,
" Prize " ,
" Prizes " ,
" Produced and sent out " ,
" Reserves the right " ,
" Shopping spree " ,
" Stuff on sale " ,
" Terms and conditions " ,
" The best rates " ,
" They’ re just giving it away " ,
" Trial " ,
" Unlimited " ,
" Unsolicited " ,
" Vacation " ,
" Vacation offers " ,
" Warranty " ,
" We honor all " ,
" Weekend getaway " ,
" What are you waiting for? " ,
" Who really wins? " ,
" Win " ,
" Winner " ,
" Winning " ,
" Won " ,
" You are a winner! " ,
" You have been selected " ,
" You’ re a Winner! " ,
)
) ,
" Calls-to-Action " : (
" " ,
(
" Cancel at any time " ,
" Compare " ,
" Copy accurately " ,
" Get " ,
" Give it away " ,
" Print form signature " ,
" Print out and fax " ,
" See for yourself " ,
" Sign up free today " ,
)
) ,
" Free " : (
" " ,
(
" Free " ,
" Free access " ,
" Free cell phone " ,
" Free consultation " ,
" Free DVD " ,
" Free gift " ,
" Free grant money " ,
" Free hosting " ,
" Free installation " ,
" Free Instant " ,
" Free investment " ,
" Free leads " ,
" Free membership " ,
" Free money " ,
" Free offer " ,
" Free preview " ,
" Free priority mail " ,
" Free quote " ,
" Free sample " ,
" Free trial " ,
" Free website " ,
)
) ,
" Descriptions/Adjectives " : (
" " ,
(
" All natural " ,
" All new " ,
" Amazing " ,
" Certified " ,
" Congratulations " ,
" Drastically reduced " ,
" Fantastic deal " ,
" For free " ,
" Guaranteed " ,
" It’ s effective " ,
" Outstanding values " ,
" Promise you " ,
" Real thing " ,
" Risk free " ,
" Satisfaction guaranteed " ,
)
) ,
" Sense of Urgency " : (
" " ,
(
" Access " ,
" Act now! " ,
" Apply now " ,
" Apply online " ,
" Call free " ,
" Call now " ,
" Can ' t live without " ,
" Do it today " ,
" Don ' t delete " ,
" Don ' t hesitate " ,
" For instant access " ,
" For Only " ,
" For you " ,
" Get it now " ,
" Get started now " ,
" Great offer " ,
" Info you requested " ,
" Information you requested " ,
" Instant " ,
" Limited time " ,
" New customers only " ,
" Now " ,
" Now only " ,
" Offer expires " ,
" Once in lifetime " ,
" One time " ,
" Only " ,
" Order now " ,
" Order today " ,
" Please read " ,
" Special promotion " ,
" Supplies are limited " ,
" Take action now " ,
" Time limited " ,
" Urgent " ,
" While supplies last " ,
)
) ,
" Nouns " : (
" " ,
(
" Addresses on CD " ,
" Beverage " ,
" Bonus " ,
" Brand new pager " ,
" Cable converter " ,
" Casino " ,
" Celebrity " ,
" Copy DVDs " ,
" Laser printer " ,
" Legal " ,
" Luxury car " ,
" New domain extensions " ,
" Phone " ,
" Rolex " ,
" Stainless steel "
)
)
}
2021-10-17 15:22:05 +02:00
def __init__ ( self , options ) :
self . options = options
self . results = { }
def parse ( self , html ) :
self . html = html
self . soup = BeautifulSoup ( html , features = " lxml " )
2021-10-29 03:22:54 +02:00
self . results [ ' Embedded Images ' ] = self . testEmbeddedImages ( )
self . results [ ' Images without ALT ' ] = self . testImagesNoAlt ( )
self . results [ ' Masqueraded Links ' ] = self . testMaskedLinks ( )
self . results [ ' Use of underline tag <u> ' ] = self . testUnderlineTag ( )
self . results [ ' HTML code in <a> link tags ' ] = self . testLinksWithHtmlCode ( )
self . results [ ' <a href= " ... " > URL contained GET parameter ' ] = self . testLinksWithGETParams ( )
self . results [ ' <a href= " ... " > URL contained GET parameter with URL ' ] = self . testLinksWithGETParamsBeingURLs ( )
self . results [ ' <a href= " ... " > URL pointed to an executable file ' ] = self . testLinksWithDangerousExtensions ( )
2021-11-02 17:05:32 +01:00
self . results [ ' Mail message contained suspicious words ' ] = self . testSuspiciousWords ( )
2021-10-17 15:22:05 +02:00
return { k : v for k , v in self . results . items ( ) if v }
@staticmethod
def context ( tag ) :
s = str ( tag )
if len ( s ) < 100 :
return s
beg = s [ : 50 ]
end = s [ - 50 : ]
return f ' { beg } ... { end } '
2021-10-28 21:12:23 +02:00
def testUnderlineTag ( self ) :
links = self . soup ( ' u ' )
if not links or len ( links ) == 0 :
return [ ]
desc = ' Underline tags are recognized by anti-spam filters and trigger additional rule (Office365: 67856001), but by their own shouldnt impact spam score. '
result = f ' - Found { len ( links ) } <u> tags. This is not by itself an indication of spam, but is known to trigger some rules (like Office365: 67856001) \n '
context = ' '
for i in range ( len ( links ) ) :
2021-10-29 03:22:54 +02:00
context + = str ( links [ i ] ) + ' \n \n '
if i > 5 : break
2021-10-28 21:12:23 +02:00
return {
' description ' : desc ,
' context ' : context ,
' analysis ' : result
}
2021-11-02 17:05:32 +01:00
def testSuspiciousWords ( self ) :
desc = '''
Input text message contained words considered as suspicious in context of E - Mails .
Therefore you will have better chances of delivering your phishing e - mail when you get rid of them .
'''
context = ' '
result = ' '
text = self . html
foundWords = set ( )
totalFound = 0
for title , words in PhishingMailParser . Suspicious_Words . items ( ) :
found = set ( )
for word in words [ 1 ] :
if word . lower ( ) in foundWords :
continue
if re . search ( r ' \ b ' + re . escape ( word ) + r ' \ b ' , text , re . I ) :
found . add ( word . lower ( ) )
foundWords . add ( word . lower ( ) )
pos = text . find ( word . lower ( ) )
if pos != - 1 :
line = ' '
N = 50
if pos > N :
line = text [ pos - N : pos ]
line + = text [ pos : pos + N ]
pos2 = line . find ( word . lower ( ) )
line = line [ : pos2 ] + logger . colored ( line [ pos2 : pos2 + len ( word ) ] , " red " ) + line [ pos2 + len ( word ) : ]
line = line . replace ( ' \n ' , ' ' )
line = re . sub ( r ' { 2,} ' , ' ' , line )
context + = ' \n ' + line + ' \n '
if len ( found ) > 0 :
totalFound + = len ( found )
result + = f ' - Found { logger . colored ( len ( found ) , " red " ) } { logger . colored ( title , " yellow " ) } words { logger . colored ( words [ 0 ] , " cyan " ) } : \n '
for w in found :
result + = f ' \t - { w } \n '
result + = ' \n '
if totalFound == 0 :
return { }
result + = f ' - Found in total { logger . colored ( totalFound , " red " ) } suspicious words. \n '
return {
' description ' : desc ,
' context ' : context ,
' analysis ' : result
}
2021-10-29 03:22:54 +02:00
def testLinksWithHtmlCode ( self ) :
links = self . soup ( ' a ' )
desc = ' Links that contain HTML code within <a> ... </a> may increase Spam score heavily '
context = ' '
result = ' '
num = 0
embed = ' '
for link in links :
text = str ( link )
pos = text . find ( ' > ' )
code = text [ pos + 1 : ]
m = re . search ( r ' (.+)< \ s*/ \ s*a \ s*> ' , code , re . I )
if m :
code = m . group ( 1 )
suspicious = ' < ' in text and ' > ' in text
if suspicious :
num + = 1
if num < 5 :
N = 70
tmp = text [ : N ]
if len ( text ) > N :
tmp + = ' ... ' + text [ - N : ]
context + = tmp + ' \n '
code2 = PhishingMailParser . context ( code )
context + = f " \n \t - { logger . colored ( ' Code inside of <a> tag: ' , ' red ' ) } \n \t \t " + logger . colored ( code2 , ' yellow ' ) + ' \n '
if num > 0 :
result + = f ' - Found { num } <a> tags that contained HTML code inside! \n '
result + = ' \t Links conveying HTML code within <a> ... </a> may greatly increase message Spam score! \n '
if len ( result ) == 0 :
return [ ]
return {
' description ' : desc ,
' context ' : context ,
' analysis ' : result
}
def testLinksWithGETParams ( self ) :
links = self . soup ( ' a ' )
desc = ' Links with URLs containing GET parameters will be noticed by anti-spam filters resulting in another rule triggering on message (Office365: 21615005). '
context = ' '
result = ' '
num = 0
embed = ' '
for link in links :
try :
href = link [ ' href ' ]
except :
continue
text = link . getText ( )
params = dict ( parse . parse_qsl ( parse . urlsplit ( href ) . query ) )
if len ( params ) > 0 :
num + = 1
if num < 5 :
context + = PhishingMailParser . context ( link ) + ' \n '
hr = href [ : 90 ]
pos = hr . find ( ' ? ' )
hr = hr [ : pos ] + logger . colored ( hr [ pos : ] , ' yellow ' )
context + = f ' \t href = " { hr } " \n '
context + = f ' \t text = " { text [ : 90 ] } " \n \n '
if num > 0 :
result + = f ' - Found { num } <a> tags with href= " ... " URLs containing GET params. \n '
result + = ' \t Links with URLs that contain GET params might trigger anti-spam rule (Office365: 21615005) \n '
if len ( result ) == 0 :
return [ ]
return {
' description ' : desc ,
' context ' : context ,
' analysis ' : result
}
def testLinksWithDangerousExtensions ( self ) :
links = self . soup ( ' a ' )
desc = ' Message contained <a> tags with href= " ... " links pointing to a file with dangerous extension (such as .exe) '
context = ' '
result = ' '
num = 0
embed = ' '
for link in links :
try :
href = link [ ' href ' ]
except :
continue
text = link . getText ( )
parsed = parse . urlsplit ( href )
if ' . ' not in parsed . path :
continue
pos = parsed . path . rfind ( ' . ' )
if pos == - 1 :
continue
extension = parsed . path . lower ( ) [ pos : ]
if extension in executable_extensions :
num + = 1
if num < 5 :
context + = PhishingMailParser . context ( link ) + ' \n '
hr = href [ : 90 ]
pos1 = hr . lower ( ) . find ( extension . lower ( ) )
hr = logger . colored ( hr [ : pos1 ] , ' yellow ' ) + logger . colored ( hr [ pos1 : pos1 + len ( extension ) ] , ' red ' ) + logger . colored ( hr [ pos1 + len ( extension ) : ] , ' yellow ' )
context + = f ' \t href = " { hr } " \n '
context + = f ' \t text = " { text [ : 90 ] } " \n \n '
context + = f ' \t Extension matched: { logger . colored ( extension , " red " ) } \n '
if num > 0 :
result + = f ' - Found { num } <a> tags with href= " ... " URLs pointing to files with dangerous extensions (such as .exe). \n '
result + = ' \t Links with URLs that point to potentially executable files might trigger anti-spam rule (Office365: 460985005) \n '
if len ( result ) == 0 :
return [ ]
return {
' description ' : desc ,
' context ' : context ,
' analysis ' : result
}
def testLinksWithGETParamsBeingURLs ( self ) :
links = self . soup ( ' a ' )
desc = ' Links with URLs that contain GET parameters pointing to another URL, will trigger two Office365 anti-spam rules (Office365: 45080400002). '
context = ' '
result = ' '
num = 0
embed = ' '
for link in links :
try :
href = link [ ' href ' ]
except :
continue
text = link . getText ( )
params = dict ( parse . parse_qsl ( parse . urlsplit ( href ) . query ) )
url = re . compile ( r ' ((http|https) \ : \ / \ /)?[a-zA-Z0-9 \ . \ / \ ? \ :@ \ -_=#]+ \ .([a-zA-Z]) { 2,6}([a-zA-Z0-9 \ . \ & \ / \ ? \ :@ \ -_=#])* ' )
if len ( params ) > 0 :
for k , v in params . items ( ) :
m = url . match ( v )
if m :
urlmatched = m . group ( 1 )
num + = 1
if num < 5 :
context + = PhishingMailParser . context ( link ) + ' \n '
hr = href [ : 90 ]
hr = logger . colored ( hr , ' yellow ' )
context + = f ' \t href = " { hr } " \n '
context + = f ' \t text = " { text [ : 90 ] } " \n \n '
context + = f ' \t href URL GET parameter contained another URL: \n \t \t ' + logger . colored ( v , " red " ) + ' \n '
if num > 0 :
result + = f ' - Found { num } <a> tags with href= " ... " URLs containing GET params containing another URL. \n '
result + = ' \t Links with URLs that contain GET params with another URL might trigger anti-spam rule (Office365: 45080400002) \n '
if len ( result ) == 0 :
return [ ]
return {
' description ' : desc ,
' context ' : context ,
' analysis ' : result
}
2021-10-17 15:22:05 +02:00
def testMaskedLinks ( self ) :
links = self . soup ( ' a ' )
desc = ' Links that masquerade their href= attribute by displaying different link are considered harmful and will increase Spam score. '
context = ' '
result = ' '
num = 0
embed = ' '
for link in links :
try :
href = link [ ' href ' ]
except :
continue
text = link . getText ( )
url = re . compile ( r ' ((http|https) \ : \ / \ /)?[a-zA-Z0-9 \ . \ / \ ? \ :@ \ -_=#]+ \ .([a-zA-Z]) { 2,6}([a-zA-Z0-9 \ . \ & \ / \ ? \ :@ \ -_=#])* ' )
2021-11-02 17:05:32 +01:00
url2 = re . compile ( r ' ((http|https) \ : \ / \ /)[a-zA-Z0-9 \ . \ / \ ? \ :@ \ -_=#]+ \ .([a-zA-Z]) { 2,6}([a-zA-Z0-9 \ . \ & \ / \ ? \ :@ \ -_=#])* ' )
2021-10-17 15:22:05 +02:00
m1 = url . match ( href )
2021-11-02 17:05:32 +01:00
m2 = url2 . search ( text )
2021-10-17 15:22:05 +02:00
if m1 and m2 :
num + = 1
2021-10-29 03:22:54 +02:00
if num < 5 :
context + = PhishingMailParser . context ( link ) + ' \n '
context + = f ' \t href = " { logger . colored ( href [ : 90 ] , " green " ) } " \n '
context + = f ' \t text = " { logger . colored ( text [ : 90 ] , " red " ) } " \n \n '
2021-10-17 15:22:05 +02:00
if num > 0 :
result + = f ' - Found { num } <a> tags that masquerade their href= " " links with text! \n '
result + = ' \t Links that try to hide underyling URL are harmful and will be considered as Spam! \n '
if len ( result ) == 0 :
return [ ]
return {
' description ' : desc ,
' context ' : context ,
' analysis ' : result
}
def testImagesNoAlt ( self ) :
images = self . soup ( ' img ' )
desc = ' Images without ALT= " value " attribute may increase Spam scorage. '
context = ' '
result = ' '
num = 0
embed = ' '
for img in images :
src = img [ ' src ' ]
alt = ' '
try :
alt = img [ ' alt ' ]
except :
pass
if alt == ' ' :
num + = 1
2021-10-29 03:22:54 +02:00
if num < 5 :
context + = PhishingMailParser . context ( img ) + ' \n \n '
2021-10-17 15:22:05 +02:00
if num > 0 :
result + = f ' - Found { num } <img> tags without ALT= " value " attribute. \n '
result + = ' \t Images without alternate text set in their attribute may increase Spam score \n '
if len ( result ) == 0 :
return [ ]
return {
' description ' : desc ,
' context ' : context ,
' analysis ' : result
}
def testEmbeddedImages ( self ) :
images = self . soup ( ' img ' )
desc = ' Embedded images can increase Spam Confidence Level (SCL) in Office365 by 4 points. Embedded images are those with <img src= " data:image/png;base64,<BLOB> " /> . They should be avoided. '
context = ' '
result = ' '
num = 0
embed = ' '
for img in images :
src = img [ ' src ' ]
alt = ' '
try :
alt = img [ ' alt ' ]
except :
pass
if src . lower ( ) . startswith ( ' data:image/ ' ) :
if len ( embed ) == 0 :
embed = src [ : 30 ]
num + = 1
2021-10-29 03:22:54 +02:00
if num < 5 :
if len ( alt ) > 0 :
context + = f ' - ALT= " { alt } " : ' + PhishingMailParser . context ( img ) + ' \n '
else :
ctx = PhishingMailParser . context ( img )
pos = ctx . find ( ' data: ' )
pos2 = ctx . find ( ' " ' , pos + 1 )
ctx = logger . colored ( ctx [ : pos ] , ' yellow ' ) + logger . colored ( ctx [ pos : pos2 ] , ' red ' ) + logger . colored ( ctx [ pos2 : ] , ' yellow ' )
context + = ctx + ' \n '
2021-10-17 15:22:05 +02:00
if num > 0 :
result + = f ' - Found { num } <img> tags with embedded image ( { embed } ). \n '
result + = ' \t Embedded images increase Office365 SCL (Spam) level by 4 points! \n '
if len ( result ) == 0 :
return [ ]
return {
' description ' : desc ,
' context ' : context ,
' analysis ' : result
}
def printOutput ( out ) :
if options [ ' format ' ] == ' text ' :
width = 100
num = 0
for k , v in out . items ( ) :
num + = 1
2021-10-29 03:22:54 +02:00
analysis = v [ ' analysis ' ] . strip ( )
context = v [ ' context ' ] . strip ( )
2021-10-28 21:12:23 +02:00
desc = ' \n ' . join ( textwrap . wrap (
v [ ' description ' ] ,
width = 80 ,
initial_indent = ' ' ,
subsequent_indent = ' '
2021-10-29 03:22:54 +02:00
) ) . strip ( )
2021-10-17 15:22:05 +02:00
analysis = analysis . replace ( ' - ' , ' \t - ' )
print ( f '''
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2021-10-29 03:22:54 +02:00
( { num } ) Test : { logger . colored ( k , " cyan " ) }
{ logger . colored ( " DESCRIPTION " , " blue " ) } :
2021-10-17 15:22:05 +02:00
2021-10-28 21:12:23 +02:00
{ desc }
2021-10-29 03:22:54 +02:00
{ logger . colored ( " CONTEXT " , " blue " ) } :
2021-10-17 15:22:05 +02:00
{ context }
2021-10-29 03:22:54 +02:00
{ logger . colored ( " ANALYSIS " , " blue " ) } :
2021-10-17 15:22:05 +02:00
{ analysis }
''' )
elif options [ ' format ' ] == ' json ' :
print ( json . dumps ( out ) )
def opts ( argv ) :
global options
global headers
o = argparse . ArgumentParser (
usage = ' phishing-HTML-linter.py [options] <file.html> '
)
req = o . add_argument_group ( ' Required arguments ' )
req . add_argument ( ' file ' , help = ' Input HTML file ' )
args = o . parse_args ( )
2021-10-29 03:22:54 +02:00
options . update ( vars ( args ) )
2021-10-17 15:22:05 +02:00
return args
def main ( argv ) :
args = opts ( argv )
if not args :
return False
print ( '''
: : Phishing HTML Linter
Shows you bad smells in your HTML code that will get your mails busted !
2021-10-24 23:11:42 +02:00
Mariusz Banach / mgeeky
2021-10-17 15:22:05 +02:00
''' )
html = ' '
with open ( args . file , ' rb ' ) as f :
html = f . read ( )
p = PhishingMailParser ( { } )
ret = p . parse ( html . decode ( ) )
2021-10-29 03:22:54 +02:00
if len ( ret ) > 0 :
printOutput ( ret )
else :
print ( ' \n [+] Congrats! Your message does not have any known bad smells that could trigger anti-spam rules. \n ' )
2021-10-17 15:22:05 +02:00
if __name__ == ' __main__ ' :
main ( sys . argv )