# -*- coding: utf-8 -*-
# BSD 2-Clause License
#
# Apprise - Push Notification Library.
# Copyright (c) 2024, Chris Caron <lead2gold@gmail.com>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import re
import os
import requests
import threading
from tempfile import NamedTemporaryFile
from . AttachBase import AttachBase
from . . common import ContentLocation
from . . URLBase import PrivacyMode
from . . AppriseLocale import gettext_lazy as _
class AttachHTTP ( AttachBase ) :
"""
A wrapper for HTTP based attachment sources
"""
# The default descriptive name associated with the service
service_name = _ ( ' Web Based ' )
# The default protocol
protocol = ' http '
# The default secure protocol
secure_protocol = ' https '
# The number of bytes in memory to read from the remote source at a time
chunk_size = 8192
# Web based requests are remote/external to our current location
location = ContentLocation . HOSTED
# thread safe loading
_lock = threading . Lock ( )
def __init__ ( self , headers = None , * * kwargs ) :
"""
Initialize HTTP Object
headers can be a dictionary of key / value pairs that you want to
additionally include as part of the server headers to post with
"""
super ( ) . __init__ ( * * kwargs )
self . schema = ' https ' if self . secure else ' http '
self . fullpath = kwargs . get ( ' fullpath ' )
if not isinstance ( self . fullpath , str ) :
self . fullpath = ' / '
self . headers = { }
if headers :
# Store our extra headers
self . headers . update ( headers )
# Where our content is written to upon a call to download.
self . _temp_file = None
# Our Query String Dictionary; we use this to track arguments
# specified that aren't otherwise part of this class
self . qsd = { k : v for k , v in kwargs . get ( ' qsd ' , { } ) . items ( )
if k not in self . template_args }
return
def download ( self , * * kwargs ) :
"""
Perform retrieval of the configuration based on the specified request
"""
if self . location == ContentLocation . INACCESSIBLE :
# our content is inaccessible
return False
# prepare header
headers = {
' User-Agent ' : self . app_id ,
}
# Apply any/all header over-rides defined
headers . update ( self . headers )
auth = None
if self . user :
auth = ( self . user , self . password )
url = ' %s :// %s ' % ( self . schema , self . host )
if isinstance ( self . port , int ) :
url + = ' : %d ' % self . port
url + = self . fullpath
# Where our request object will temporarily live.
r = None
# Always call throttle before any remote server i/o is made
self . throttle ( )
with self . _lock :
if self . exists ( retrieve_if_missing = False ) :
# Due to locking; it's possible a concurrent thread already
# handled the retrieval in which case we can safely move on
self . logger . trace (
' HTTP Attachment %s already retrieved ' ,
self . _temp_file . name )
return True
# Ensure any existing content set has been invalidated
self . invalidate ( )
self . logger . debug (
' HTTP Attachment Fetch URL: %s (cert_verify= %r ) ' % (
url , self . verify_certificate ) )
try :
# Make our request
with requests . get (
url ,
headers = headers ,
auth = auth ,
params = self . qsd ,
verify = self . verify_certificate ,
timeout = self . request_timeout ,
stream = True ) as r :
# Handle Errors
r . raise_for_status ( )
# Get our file-size (if known)
try :
file_size = int ( r . headers . get ( ' Content-Length ' , ' 0 ' ) )
except ( TypeError , ValueError ) :
# Handle edge case where Content-Length is a bad value
file_size = 0
# Perform a little Q/A on file limitations and restrictions
if self . max_file_size > 0 and \
file_size > self . max_file_size :
# The content retrieved is to large
self . logger . error (
' HTTP response exceeds allowable maximum file '
' length ( {} KB): {} ' . format (
int ( self . max_file_size / 1024 ) ,
self . url ( privacy = True ) ) )
# Return False (signifying a failure)
return False
# Detect config format based on mime if the format isn't
# already enforced
self . detected_mimetype = r . headers . get ( ' Content-Type ' )
d = r . headers . get ( ' Content-Disposition ' , ' ' )
result = re . search (
" filename=[ ' \" ]?(?P<name>[^ ' \" ]+)[ ' \" ]? " , d , re . I )
if result :
self . detected_name = result . group ( ' name ' ) . strip ( )
# Create a temporary file to work with; delete must be set
# to False or it isn't compatible with Microsoft Windows
# instances. In lieu of this, __del__ will clean up the
# file for us.
self . _temp_file = NamedTemporaryFile ( delete = False )
# Get our chunk size
chunk_size = self . chunk_size
# Track all bytes written to disk
bytes_written = 0
# If we get here, we can now safely write our content to
# disk
for chunk in r . iter_content ( chunk_size = chunk_size ) :
# filter out keep-alive chunks
if chunk :
self . _temp_file . write ( chunk )
bytes_written = self . _temp_file . tell ( )
# Prevent a case where Content-Length isn't
# provided. In this case we don't want to fetch
# beyond our limits
if self . max_file_size > 0 :
if bytes_written > self . max_file_size :
# The content retrieved is to large
self . logger . error (
' HTTP response exceeds allowable '
' maximum file length '
' ( {} KB): {} ' . format (
int ( self . max_file_size / 1024 ) ,
self . url ( privacy = True ) ) )
# Invalidate any variables previously set
self . invalidate ( )
# Return False (signifying a failure)
return False
elif bytes_written + chunk_size \
> self . max_file_size :
# Adjust out next read to accomodate up to
# our limit +1. This will prevent us from
# reading to much into our memory buffer
self . max_file_size - bytes_written + 1
# Ensure our content is flushed to disk for post-processing
self . _temp_file . flush ( )
# Set our minimum requirements for a successful download()
# call
self . download_path = self . _temp_file . name
if not self . detected_name :
self . detected_name = os . path . basename ( self . fullpath )
except requests . RequestException as e :
self . logger . error (
' A Connection error occurred retrieving HTTP '
' configuration from %s . ' % self . host )
self . logger . debug ( ' Socket Exception: %s ' % str ( e ) )
# Invalidate any variables previously set
self . invalidate ( )
# Return False (signifying a failure)
return False
except ( IOError , OSError ) :
# IOError is present for backwards compatibility with Python
# versions older then 3.3. >= 3.3 throw OSError now.
# Could not open and/or write the temporary file
self . logger . error (
' Could not write attachment to disk: {} ' . format (
self . url ( privacy = True ) ) )
# Invalidate any variables previously set
self . invalidate ( )
# Return False (signifying a failure)
return False
# Return our success
return True
def invalidate ( self ) :
"""
Close our temporary file
"""
if self . _temp_file :
self . logger . trace (
' Attachment cleanup of %s ' , self . _temp_file . name )
self . _temp_file . close ( )
try :
# Ensure our file is removed (if it exists)
os . unlink ( self . _temp_file . name )
except OSError :
pass
# Reset our temporary file to prevent from entering
# this block again
self . _temp_file = None
super ( ) . invalidate ( )
def __del__ ( self ) :
"""
Tidy memory if open
"""
with self . _lock :
self . invalidate ( )
def url ( self , privacy = False , * args , * * kwargs ) :
"""
Returns the URL built dynamically based on specified arguments .
"""
# Our URL parameters
params = self . url_parameters ( privacy = privacy , * args , * * kwargs )
# Prepare our cache value
if self . cache is not None :
if isinstance ( self . cache , bool ) or not self . cache :
cache = ' yes ' if self . cache else ' no '
else :
cache = int ( self . cache )
# Set our cache value
params [ ' cache ' ] = cache
if self . _mimetype :
# A format was enforced
params [ ' mime ' ] = self . _mimetype
if self . _name :
# A name was enforced
params [ ' name ' ] = self . _name
# Append our headers into our parameters
params . update ( { ' + {} ' . format ( k ) : v for k , v in self . headers . items ( ) } )
# Apply any remaining entries to our URL
params . update ( self . qsd )
# Determine Authentication
auth = ' '
if self . user and self . password :
auth = ' {user} : {password} @ ' . format (
user = self . quote ( self . user , safe = ' ' ) ,
password = self . pprint (
self . password , privacy , mode = PrivacyMode . Secret , safe = ' ' ) ,
)
elif self . user :
auth = ' {user} @ ' . format (
user = self . quote ( self . user , safe = ' ' ) ,
)
default_port = 443 if self . secure else 80
return ' {schema} :// {auth} {hostname} {port} {fullpath} ? {params} ' . format (
schema = self . secure_protocol if self . secure else self . protocol ,
auth = auth ,
hostname = self . quote ( self . host , safe = ' ' ) ,
port = ' ' if self . port is None or self . port == default_port
else ' : {} ' . format ( self . port ) ,
fullpath = self . quote ( self . fullpath , safe = ' / ' ) ,
params = self . urlencode ( params ) ,
)
@staticmethod
def parse_url ( url ) :
"""
Parses the URL and returns enough arguments that can allow
us to re - instantiate this object .
"""
results = AttachBase . parse_url ( url )
if not results :
# We're done early as we couldn't load the results
return results
# Add our headers that the user can potentially over-ride if they wish
# to to our returned result set
results [ ' headers ' ] = results [ ' qsd- ' ]
results [ ' headers ' ] . update ( results [ ' qsd+ ' ] )
return results