#!/usr/bin/python3

''' 
nextlist.py (c) Richard Kay 2023-2024
License: GNU public license version 2

This program takes an email as standard input
and distributes it to an email list, manipulating
headers and recreating the outgoing email customised
for each list member.

It is the email handing part of the Nextlist email
list management package.

'''

# Import smtplib for the actual sending function
import smtplib
import email
import copy # for email object cloning purposes
# Import the email modules we'll need
from email.message import EmailMessage
from email import parser
import sys
import os
import os.path
import rmutils
import urllib.parse
from rmutils import log # used to log events
home_dir=rmutils.data_directory
web_dir=rmutils.web_directory
sys.path.append(web_dir)
import address_textio

def main(argv):
  ''' argv is a list including listname and domain '''
  if len(argv) != 3:
    rmutils.senderror(None,"incorrect call need listname and domainname") 
  listname=argv[1]
  mydomain=argv[2] # email domain - not web domain
  # configuration variables
  cfgf=os.path.join(home_dir,listname,"list.cfg")
  mime_policy_file=os.path.join(home_dir,listname,"mimetypes.txt")
  if not os.access(cfgf, os.R_OK):
      rmutils.senderror(None,"list config file: "+cfgf+" not readable.") 
  vs=rmutils.get_vars(cfgf)
  webroot=vs['webroot'] # root path of web files
  timeout=vs['DownloadDays'] # number of days for list while downloadables are stored
  # Text footer as configured can have between 1 and 9 lines
  tf=''
  for i in range(1,10):
    keystr='Text_footer'+str(i)
    if keystr in vs.keys() :
      tf+=vs['Text_footer'+str(i)]+'\n'
  text_footer=tf
  unsub_method=vs['UnsubscribeMethod'].lower() # either easy or secure
  if unsub_method not in ['easy','secure']:
    rmutils.senderror(None,"Config error, must be easy or secure") 
  membertab=rmutils.get_list(home_dir,listname) # member address table
  members=membertab.keys()
  max_length=int(vs['MaxLength']) # max message length
  msgtextlines=sys.stdin.readlines()
  if not msgtextlines or ( type(msgtextlines) != type([])) :
    rmutils.senderror(None,"Empty message. How this this happen ?")
  msgtext=""
  if msgtextlines[0][0:4] == 'From ':
    for line in msgtextlines[1:] : # Ignore Unix From header line
      msgtext+=line
  else: 
    for line in msgtextlines : # No Unix From header line
      msgtext+=line
  try:
    headerstr,body=msgtext.split("\n\n",maxsplit=1)
  except:
    rmutils.senderror(None,"error splitting email input headers and body")
  outm=EmailMessage()
  inm=parser.Parser().parsestr(msgtext)
  payload=inm.get_payload() # should be either string (no Mime) or list (multipart)
  #
  reject1='' # reason for rejection message
  reject=''
  BodyError=False # use this if no body
  if type(payload) == type('string'):
    payl=payload
    if not payl:
      BodyError=True # use this if no body
      reject1+='Empty string payload.\n'  # reason for rejection message
  else:
    payl=''
  if not inm.is_multipart():
    multipart=False
    attached=0 # in case any code asks
  else:
    multipart=True
    payl,attached,plaintext,reject=xfer_attachments(inm,outm,mime_policy_file,listname,webroot,timeout)
    reject=reject1+reject
    if not attached:
      multipart=False # don't need multipart o/p if no attachements xferred
    if not payl :
    #if (not payl) or plaintext == 0:
      BodyError=True # use this if no body. Delay sending until we have a from
      reject+='Multipart Message with no body text\n'
    if reject: # for whatever reason 
      BodyError=True 
  # Received Chain
  rec_chain=inm.get_all('Received')
  if rec_chain: # None if doesn't have these routing headers
    for header in rec_chain:
      header=rmutils.strip_cr_lf(header) # Policy dislikes multiline headers
      outm.add_header('Received',header)
  # In-Reply-To
  if not 'Message-ID' in inm.keys():
    In_Reply_To=False
  else:
    In_Reply_To=True
    outm['In-Reply-To']=rmutils.strip_cr_lf(inm['Message-ID'])
  # Subject 
  if not 'Subject' in inm.keys(): 
    outm['Subject'] = f'[{listname}] no Subject'
  elif inm['Subject'] == "" : 
    outm['Subject'] = f'[{listname}] no Subject'
  else:
    if f'[{listname}]' in inm['Subject'].lower():
      outm['Subject']=rmutils.strip_cr_lf(inm['Subject'])
    else:  
      outm['Subject']=rmutils.strip_cr_lf(f'[{listname}] '+inm['Subject'])
  # From and Reply-To
  sendaddr=listname+'@'+mydomain
  if not 'From' in inm.keys():
    # An error case. We want to route error message somewhere
    fromname=listname
    fromaddr='postmaster'
  elif inm['From'] == "" : 
    # same kind of error case. We want to route error message
    fromname=listname
    fromaddr='postmaster'
  else: 
    # normal valid email 
    fromoriginal = address_textio.just_name(inm['From'])
    fromname = fromoriginal+' via '+listname
    fromaddr = address_textio.just_address(inm['From'])
    # reply goes by default to sender, not list. There was much
    # heated debate about this long ago, and the reply to sender 
    # default preference won by a mile. If you think this
    # wrong, fix the code yourself and suggest provide a config option.
    outm['Reply-To']=rmutils.strip_cr_lf(address_textio.make_to_address(fromaddr,fromoriginal))
  newfromstr=address_textio.make_to_address(sendaddr,fromname)
  outm['From']=rmutils.strip_cr_lf(newfromstr)
  # Cc header. Include list address so reply-to-all works correctly 
  #originalcc=''
  #if 'Cc' in inm.keys():
  # originalcc=', '+inm['Cc']
  #  originalcc=inm['Cc']
  #if sendaddr.lower() in originalcc.lower():
  #  newcc=originalcc # don't want list address for reply all in Cc twice
  #else:
  #newcc=sendaddr+', '+originalcc
  newcc=sendaddr
  newcc=rmutils.strip_cr_lf(newcc) # Policy dislikes multiline headers
  outm['Cc']=newcc
  #
  # Now we have a from likely to go to sender reject if no message body
  if BodyError:
      errmes="Message without any body plain text or containing one or more\n"
      errmes+="attachments which were not accepted.  Further information:\n\n" 
      errmes+=reject+'\n'
      rmutils.senderror(fromaddr,errmes)
  # reject if message too long
  if len(msgtext) > max_length:
      errmes="Message length of "+str(len(msgtext))+" exceeds list: "+listname+" maximum"
      errmes+=" of "+str(max_length)+".\nMessage rejected."
      rmutils.senderror(fromaddr,errmes)
  # announcement only list restricts posting
  if '@' in vs['WhoCanPost']:
    canpostfrom=address_textio.just_address(vs['WhoCanPost'])
  else:
    canpostfrom=vs['WhoCanPost'].lower()
  if canpostfrom != 'member': # announcement only list
    if canpostfrom != fromaddr:
      # trying to send to announcement only list by non announcer address     
      errmes=('Your address '+ fromaddr + 
              ' is not registered as announcement address for: ' + listname +
              ' \nYou may contact the listowner. ' +vs['Listowner'] ) 
      rmutils.senderror(fromaddr,errmes)
  elif not fromaddr in members:
      errmes=('Your address '+ fromaddr + 
              ' is not a member of list: ' + listname +
              ' to request addition you must contact listowner ' 
              +vs['Listowner'] ) 
      rmutils.senderror(fromaddr,errmes)
  # need to check if member is moderated and delay posting by moderated member
  fromindex=membertab.find(fromaddr)
  from_moderated=membertab.data[fromindex]['moderated']
  if from_moderated.upper() == 'Y':
    errmes=('Your address '+ fromaddr + 
            ' is a moderated member of list: ' + listname +
            ' . To request posting rights you must contact listowner ' 
              +vs['Listowner'] ) 
    rmutils.senderror(fromaddr,errmes)
  outm['To'] = vs['Sender'] # need to have a default before can replace
  for header in [
          'List-Id',
          'Return-Path',
          'Sender',
          'List-Unsubscribe'
          ]:
    rmutils.add_header(outm,header,vs[header])
  if vs['Visibility']=='public':
    rmutils.add_header(outm,'List-Subscribe',vs['List-Subscribe'])
  for member in members: # email to each list member in turn
    index=membertab.find(member)
    memberrow=membertab.data[index]
    cloned_outm=copy.deepcopy(outm)
    if unsub_method == 'easy': # if easy, footer has 1 click unsub method 
      # Adding footer with variable code
      Vars=[listname,vs['Listowner'],vs['List-Unsubscribe'],member,listname,memberrow['code'],'&unsub=unsub']
      rmutils.add_footer(cloned_outm,text_footer,multipart=multipart,payload=payl,Vars=Vars)
      rmutils.mail_to_recipient(cloned_outm,memberrow)
    else: # secure. Unsub link has 1 click then confirmation stage
      Vars=[listname,vs['Listowner'],vs['List-Unsubscribe'],member,listname,'CONF_RQD','&send=send']
      rmutils.add_footer(cloned_outm,text_footer,multipart=multipart,payload=payl,Vars=Vars)
      # Adding footer without variable code
      rmutils.mail_to_recipient(cloned_outm,memberrow) 
# end of main()

def get_mime_policy(mime_policy_file):
    # returns list of good mime types.
    # TBD - read this from a config file
    goodmimes=open(mime_policy_file).readlines()
    good_types=[]
    for mimetype in goodmimes:
        mimetype=rmutils.stripwhite(mimetype)
        good_types.append(mimetype)
    return good_types

def mimetype_from_file(filename):
  ''' provides security check to reject a file attachment masquerading as something else '''
  import subprocess
  if not os.path.isfile(filename):
    # should have checked it was first
    print(filename+' is not a file')
    sys.exit(1)
  mt=subprocess.getoutput('/usr/bin/file --mime-type '+filename).split(': ',maxsplit=1)[1]
  return(mt)

def xfer_attachments(inm,outm,mime_policy_file,listname,webroot,timeout):
  ''' transfers attachments from input email to temporary
  filesystem storage on Nextlist website, organised per list
  using randomised folders to store attachments relating to
  each message. These folders can be automatically pruned externally
  to this program, to ensure expiry of availability e.g. after 7 or
  14 days.

  Body text is extracted based on text/plain part, and a reference
  section showing downloadable links based on accepted attachments
  is appended to body text.

  Attachments accepted are based on a policy. A policy has 3 elements:

  1. Concerning what to do with html only (no other suitable plain text)
  2. Which attachments to treat as plain text to be inlined in the
  message body
  3. Which attachment mime types to reject and which to transfer. 
  '''
  import random # used to generate folders for saving attachments as files
  # give each email with saved attachments an obscure path part
  dir_name=str(random.randint(100000000,1000000000)) 
  refstart=webroot+'attached/' # use to build URL for reference
  att_dir=os.path.join(web_dir,'attached') # attachment directory
  if not os.path.isdir(att_dir):
    os.mkdir(att_dir)
    os.system('chmod 755 '+att_dir)
  list_dir=os.path.join(att_dir,listname) # per list attachments
  refstart+=listname+'/' # use to build URL for reference
  if not os.path.isdir(list_dir):
    os.mkdir(list_dir)
    os.system('chmod 755 '+list_dir)
  save_dir=os.path.join(list_dir,dir_name) # per list attachments 
  refstart+=dir_name+'/' # use to build URL for reference
  good_types=get_mime_policy(mime_policy_file)
  alist=[] # list of attachments
  asstr=[] # attachments as strings 
  # input attachments to alist
  plaintext=0 # count plaintext parts
  attached=0 # we don't attach everything. must have filename, no html
  refsection='\n___________________________________\n' # references section to include download links
  refsection+='Downloads available for '+timeout+' days after message date.\n'
  i=0
  for part in inm.walk() :
    if i > 0: # skip the header attachment which is first
      alist.append(part)
      asstr.append(str(part))
      # log('part'+str(i)+' '+str(part))
    i+=1
  partno=0
  body=''
  reject=''
  # process output list
  for part in alist: 
    '''
    logrec='partno :'+str(partno)
    logrec='is_attachment :'+str(partno)
    logrec+="content_charset :"+str(part.get_content_charset())  
    logrec+="content_disposition :"+str(part.get_content_disposition())
    logrec+=" content_type :"+str(part.get_content_type())
    logrec+=" filename :"+str(part.get_filename())
    logrec+=" default_type :"+str(part.get_default_type())
    log(logrec) # for debug
    '''
    filename=str(part.get_filename())
    content_type=str(part.get_content_type())
    if filename != 'None':
      # need to quote dirty chars in filename it if it is to be safely included in a url
      # filename=urllib.parse.quote(filename) # approach doesn't work, as it gets decoded back in webpage
      filename=rmutils.clean_filename(filename)
      if not filename:
        # seems unlikely, but in case of unicode or other weird filenames
        filename='FileNumber'+str(partno+1) 
      if content_type in good_types:
        if not os.path.isdir(save_dir):
          os.mkdir(save_dir)
          os.system('chmod 755 '+save_dir)
        # now need to ensure filename is unique to save_dir
        fprefix=1
        while True:
          ufname=str(fprefix)+filename # unique filename we test
          pathname=os.path.join(save_dir,ufname)
          if os.path.isfile(pathname):
            fprefix+=1
          else:
            break
        reference=refstart+ufname # use to build URL for reference
        fp = open(pathname,'wb')
        fp.write(part.get_payload(decode=True))
        fp.close()
        real_mime=mimetype_from_file(pathname)
        if real_mime != content_type:
          # no valid email client will do that. Treat as malicious
          os.remove(pathname)
          log('removed: '+content_type+' not '+real_mime+' path: '+pathname)
          reject+='Bad content type: '+real_mime+' not as claimed '+content_type+'\n'
          log('Bad content type: '+real_mime+' not as claimed '+content_type+'\n')
        else: # claimed and measured content types correspond so OK
          log('saved: '+content_type+' to '+pathname+' ref: '+reference)
          refsection+='Type: '+content_type+' '+reference+' \n'
          os.system('chmod 644 '+pathname)
          attached+=1
      else:
        reject+='Bad content type: '+content_type+'\n'
        log('Bad content type: '+content_type+'\n')
    elif content_type == 'text/html' :
      pass # if text not also in text/plain can reject later
    elif content_type == 'text/plain' :
      # don't increment attached - if none, we do old school output
      plaintext+=1 # count plaintext parts
      body=body+part.get_payload()
    partno+=1 # keep count of attachments
  if body == '': # empty body text treated as error condition
    reject+='message missing body text\n'
  if attached: # add references to downloads to end of body text
    body+=refsection
  toreturn=(body,attached,plaintext,reject) # returned values tuple
  # log('(body,attached,plaintext,reject): '+str(toreturn))
  return toreturn 

if __name__ == "__main__":
    argv=sys.argv
    main(argv)
