You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
416 lines
17 KiB
416 lines
17 KiB
2 years ago
|
###############################################################################
|
||
|
# Google Analytics 4 Measurement Protocol for Python
|
||
|
# Copyright (c) 2022, Adswerve
|
||
|
#
|
||
|
# This project is free software, distributed under the BSD license.
|
||
|
# Adswerve offers consulting and integration services if your firm needs
|
||
|
# assistance in strategy, implementation, or auditing existing work.
|
||
|
###############################################################################
|
||
|
|
||
|
import json
|
||
|
import logging
|
||
|
import urllib.request
|
||
|
import time
|
||
|
import datetime
|
||
|
import random
|
||
|
from ga4mp.utils import params_dict
|
||
|
from ga4mp.event import Event
|
||
|
from ga4mp.store import BaseStore, DictStore
|
||
|
|
||
|
import os, sys
|
||
|
sys.path.append(
|
||
|
os.path.normpath(os.path.join(os.path.dirname(__file__), ".."))
|
||
|
)
|
||
|
|
||
|
logger = logging.getLogger(__name__)
|
||
|
logger.setLevel(logging.INFO)
|
||
|
|
||
|
class BaseGa4mp(object):
|
||
|
"""
|
||
|
Parent class that provides an interface for sending data to Google Analytics, supporting the GA4 Measurement Protocol.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
api_secret : string
|
||
|
Generated through the Google Analytics UI. To create a new secret, navigate in the Google Analytics UI to: Admin > Data Streams >
|
||
|
[choose your stream] > Measurement Protocol API Secrets > Create
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
|
||
|
* Measurement Protocol (Google Analytics 4): https://developers.google.com/analytics/devguides/collection/protocol/ga4
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
# Initialize tracking object for gtag usage
|
||
|
>>> ga = gtagMP(api_secret = "API_SECRET", measurement_id = "MEASUREMENT_ID", client_id="CLIENT_ID")
|
||
|
|
||
|
# Initialize tracking object for Firebase usage
|
||
|
>>> ga = firebaseMP(api_secret = "API_SECRET", firebase_app_id = "FIREBASE_APP_ID", app_instance_id="APP_INSTANCE_ID")
|
||
|
|
||
|
# Build an event
|
||
|
>>> event_type = 'new_custom_event'
|
||
|
>>> event_parameters = {'parameter_key_1': 'parameter_1', 'parameter_key_2': 'parameter_2'}
|
||
|
>>> event = {'name': event_type, 'params': event_parameters }
|
||
|
>>> events = [event]
|
||
|
|
||
|
# Send a custom event to GA4 immediately
|
||
|
>>> ga.send(events)
|
||
|
|
||
|
# Postponed send of a custom event to GA4
|
||
|
>>> ga.send(events, postpone=True)
|
||
|
>>> ga.postponed_send()
|
||
|
"""
|
||
|
|
||
|
def __init__(self, api_secret, store: BaseStore = None):
|
||
|
self._initialization_time = time.time() # used for both session_id and calculating engagement time
|
||
|
self.api_secret = api_secret
|
||
|
self._event_list = []
|
||
|
assert store is None or isinstance(store, BaseStore), "if supplied, store must be an instance of BaseStore"
|
||
|
self.store = store or DictStore()
|
||
|
self._check_store_requirements()
|
||
|
self._base_domain = "https://www.google-analytics.com/mp/collect"
|
||
|
self._validation_domain = "https://www.google-analytics.com/debug/mp/collect"
|
||
|
|
||
|
def _check_store_requirements(self):
|
||
|
# Store must contain "session_id" and "last_interaction_time_msec" in order for tracking to work properly.
|
||
|
if self.store.get_session_parameter("session_id") is None:
|
||
|
self.store.set_session_parameter(name="session_id", value=int(self._initialization_time))
|
||
|
# Note: "last_interaction_time_msec" factors into the required "engagement_time_msec" event parameter.
|
||
|
self.store.set_session_parameter(name="last_interaction_time_msec", value=int(self._initialization_time * 1000))
|
||
|
|
||
|
def create_new_event(self, name):
|
||
|
return Event(name=name)
|
||
|
|
||
|
def send(self, events, validation_hit=False, postpone=False, date=None):
|
||
|
"""
|
||
|
Method to send an http post request to google analytics with the specified events.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
events : List[Dict]
|
||
|
A list of dictionaries of the events to be sent to Google Analytics. The list of dictionaries should adhere
|
||
|
to the following format:
|
||
|
|
||
|
[{'name': 'level_end',
|
||
|
'params' : {'level_name': 'First',
|
||
|
'success': 'True'}
|
||
|
},
|
||
|
{'name': 'level_up',
|
||
|
'params': {'character': 'John Madden',
|
||
|
'level': 'First'}
|
||
|
}]
|
||
|
|
||
|
validation_hit : bool, optional
|
||
|
Boolean to depict if events should be tested against the Measurement Protocol Validation Server, by default False
|
||
|
postpone : bool, optional
|
||
|
Boolean to depict if provided event list should be postponed, by default False
|
||
|
date : datetime
|
||
|
Python datetime object for sending a historical event at the given date. Date cannot be in the future.
|
||
|
"""
|
||
|
|
||
|
# check for any missing or invalid parameters among automatically collected and recommended event types
|
||
|
self._check_params(events)
|
||
|
self._check_date_not_in_future(date)
|
||
|
self._add_session_id_and_engagement_time(events)
|
||
|
|
||
|
if postpone is True:
|
||
|
# build event list to send later
|
||
|
for event in events:
|
||
|
event["_timestamp_micros"] = self._get_timestamp(time.time())
|
||
|
self._event_list.append(event)
|
||
|
else:
|
||
|
# batch events into sets of 25 events, the maximum allowed.
|
||
|
batched_event_list = [
|
||
|
events[event : event + 25] for event in range(0, len(events), 25)
|
||
|
]
|
||
|
# send http post request
|
||
|
self._http_post(
|
||
|
batched_event_list, validation_hit=validation_hit, date=date
|
||
|
)
|
||
|
|
||
|
def postponed_send(self):
|
||
|
"""
|
||
|
Method to send the events provided to Ga4mp.send(events,postpone=True)
|
||
|
"""
|
||
|
|
||
|
for event in self._event_list:
|
||
|
self._http_post([event], postpone=True)
|
||
|
|
||
|
# clear event_list for future use
|
||
|
self._event_list = []
|
||
|
|
||
|
def append_event_to_params_dict(self, new_name_and_parameters):
|
||
|
|
||
|
"""
|
||
|
Method to append event name and parameters key-value pairing(s) to parameters dictionary.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
new_name_and_parameters : Dict
|
||
|
A dictionary with one key-value pair representing a new type of event to be sent to Google Analytics.
|
||
|
The dictionary should adhere to the following format:
|
||
|
|
||
|
{'new_name': ['new_param_1', 'new_param_2', 'new_param_3']}
|
||
|
"""
|
||
|
|
||
|
params_dict.update(new_name_and_parameters)
|
||
|
|
||
|
def _http_post(self, batched_event_list, validation_hit=False, postpone=False, date=None):
|
||
|
"""
|
||
|
Method to send http POST request to google-analytics.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
batched_event_list : List[List[Dict]]
|
||
|
List of List of events. Places initial event payload into a list to send http POST in batches.
|
||
|
validation_hit : bool, optional
|
||
|
Boolean to depict if events should be tested against the Measurement Protocol Validation Server, by default False
|
||
|
postpone : bool, optional
|
||
|
Boolean to depict if provided event list should be postponed, by default False
|
||
|
date : datetime
|
||
|
Python datetime object for sending a historical event at the given date. Date cannot be in the future.
|
||
|
Timestamp micros supports up to 48 hours of backdating.
|
||
|
If date is specified, postpone must be False or an assertion will be thrown.
|
||
|
"""
|
||
|
self._check_date_not_in_future(date)
|
||
|
status_code = None # Default set to know if batch loop does not work and to bound status_code
|
||
|
|
||
|
# set domain
|
||
|
domain = self._base_domain
|
||
|
if validation_hit is True:
|
||
|
domain = self._validation_domain
|
||
|
logger.info(f"Sending POST to: {domain}")
|
||
|
|
||
|
# loop through events in batches of 25
|
||
|
batch_number = 1
|
||
|
for batch in batched_event_list:
|
||
|
# url and request slightly differ by subclass
|
||
|
url = self._build_url(domain=domain)
|
||
|
request = self._build_request(batch=batch)
|
||
|
self._add_user_props_to_hit(request)
|
||
|
|
||
|
# make adjustments for postponed hit
|
||
|
request["events"] = (
|
||
|
{"name": batch["name"], "params": batch["params"]}
|
||
|
if (postpone)
|
||
|
else batch
|
||
|
)
|
||
|
|
||
|
if date is not None:
|
||
|
logger.info(f"Setting event timestamp to: {date}")
|
||
|
assert (
|
||
|
postpone is False
|
||
|
), "Cannot send postponed historical hit, ensure postpone=False"
|
||
|
|
||
|
ts = self._datetime_to_timestamp(date)
|
||
|
ts_micro = self._get_timestamp(ts)
|
||
|
request["timestamp_micros"] = int(ts_micro)
|
||
|
logger.info(f"Timestamp of request is: {request['timestamp_micros']}")
|
||
|
|
||
|
if postpone:
|
||
|
# add timestamp to hit
|
||
|
request["timestamp_micros"] = batch["_timestamp_micros"]
|
||
|
|
||
|
req = urllib.request.Request(url)
|
||
|
req.add_header("Content-Type", "application/json; charset=utf-8")
|
||
|
jsondata = json.dumps(request)
|
||
|
json_data_as_bytes = jsondata.encode("utf-8") # needs to be bytes
|
||
|
req.add_header("Content-Length", len(json_data_as_bytes))
|
||
|
result = urllib.request.urlopen(req, json_data_as_bytes)
|
||
|
|
||
|
status_code = result.status
|
||
|
logger.info(f"Batch Number: {batch_number}")
|
||
|
logger.info(f"Status code: {status_code}")
|
||
|
batch_number += 1
|
||
|
|
||
|
return status_code
|
||
|
|
||
|
def _check_params(self, events):
|
||
|
|
||
|
"""
|
||
|
Method to check whether the provided event payload parameters align with supported parameters.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
events : List[Dict]
|
||
|
A list of dictionaries of the events to be sent to Google Analytics. The list of dictionaries should adhere
|
||
|
to the following format:
|
||
|
|
||
|
[{'name': 'level_end',
|
||
|
'params' : {'level_name': 'First',
|
||
|
'success': 'True'}
|
||
|
},
|
||
|
{'name': 'level_up',
|
||
|
'params': {'character': 'John Madden',
|
||
|
'level': 'First'}
|
||
|
}]
|
||
|
"""
|
||
|
|
||
|
# check to make sure it's a list of dictionaries with the right keys
|
||
|
|
||
|
assert type(events) == list, "events should be a list"
|
||
|
|
||
|
for event in events:
|
||
|
|
||
|
assert isinstance(event, dict), "each event should be an instance of a dictionary"
|
||
|
|
||
|
assert "name" in event, 'each event should have a "name" key'
|
||
|
|
||
|
assert "params" in event, 'each event should have a "params" key'
|
||
|
|
||
|
# check for any missing or invalid parameters
|
||
|
|
||
|
for e in events:
|
||
|
event_name = e["name"]
|
||
|
event_params = e["params"]
|
||
|
if event_name in params_dict.keys():
|
||
|
for parameter in params_dict[event_name]:
|
||
|
if parameter not in event_params.keys():
|
||
|
logger.warning(
|
||
|
f"WARNING: Event parameters do not match event type.\nFor {event_name} event type, the correct parameter(s) are {params_dict[event_name]}.\nThe parameter '{parameter}' triggered this warning.\nFor a breakdown of currently supported event types and their parameters go here: https://support.google.com/analytics/answer/9267735\n"
|
||
|
)
|
||
|
|
||
|
def _add_session_id_and_engagement_time(self, events):
|
||
|
"""
|
||
|
Method to add the session_id and engagement_time_msec parameter to all events.
|
||
|
"""
|
||
|
for event in events:
|
||
|
current_time_in_milliseconds = int(time.time() * 1000)
|
||
|
|
||
|
event_params = event["params"]
|
||
|
if "session_id" not in event_params.keys():
|
||
|
event_params["session_id"] = self.store.get_session_parameter("session_id")
|
||
|
if "engagement_time_msec" not in event_params.keys():
|
||
|
last_interaction_time = self.store.get_session_parameter("last_interaction_time_msec")
|
||
|
event_params["engagement_time_msec"] = current_time_in_milliseconds - last_interaction_time if current_time_in_milliseconds > last_interaction_time else 0
|
||
|
self.store.set_session_parameter(name="last_interaction_time_msec", value=current_time_in_milliseconds)
|
||
|
|
||
|
def _add_user_props_to_hit(self, hit):
|
||
|
|
||
|
"""
|
||
|
Method is a helper function to add user properties to outgoing hits.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
hit : dict
|
||
|
"""
|
||
|
|
||
|
for key in self.store.get_all_user_properties():
|
||
|
try:
|
||
|
if key in ["user_id", "non_personalized_ads"]:
|
||
|
hit.update({key: self.store.get_user_property(key)})
|
||
|
else:
|
||
|
if "user_properties" not in hit.keys():
|
||
|
hit.update({"user_properties": {}})
|
||
|
hit["user_properties"].update(
|
||
|
{key: {"value": self.store.get_user_property(key)}}
|
||
|
)
|
||
|
except:
|
||
|
logger.info(f"Failed to add user property to outgoing hit: {key}")
|
||
|
|
||
|
def _get_timestamp(self, timestamp):
|
||
|
"""
|
||
|
Method returns UNIX timestamp in microseconds for postponed hits.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
None
|
||
|
"""
|
||
|
return int(timestamp * 1e6)
|
||
|
|
||
|
def _datetime_to_timestamp(self, dt):
|
||
|
"""
|
||
|
Private method to convert a datetime object into a timestamp
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
dt : datetime
|
||
|
A datetime object in any format
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
timestamp
|
||
|
A UNIX timestamp in milliseconds
|
||
|
"""
|
||
|
return time.mktime(dt.timetuple())
|
||
|
|
||
|
def _check_date_not_in_future(self, date):
|
||
|
"""
|
||
|
Method to check that provided date is not in the future.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
date : datetime
|
||
|
Python datetime object
|
||
|
"""
|
||
|
if date is None:
|
||
|
pass
|
||
|
else:
|
||
|
assert (
|
||
|
date <= datetime.datetime.now()
|
||
|
), "Provided date cannot be in the future"
|
||
|
|
||
|
def _build_url(self, domain):
|
||
|
raise NotImplementedError("Subclass should be using this function, but it was called through the base class instead.")
|
||
|
|
||
|
def _build_request(self, batch):
|
||
|
raise NotImplementedError("Subclass should be using this function, but it was called through the base class instead.")
|
||
|
|
||
|
class GtagMP(BaseGa4mp):
|
||
|
"""
|
||
|
Subclass for users of gtag. See `Ga4mp` parent class for examples.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
measurement_id : string
|
||
|
The identifier for a Data Stream. Found in the Google Analytics UI under: Admin > Data Streams > [choose your stream] > Measurement ID (top-right)
|
||
|
client_id : string
|
||
|
A unique identifier for a client, representing a specific browser/device.
|
||
|
"""
|
||
|
|
||
|
def __init__(self, api_secret, measurement_id, client_id,):
|
||
|
super().__init__(api_secret)
|
||
|
self.measurement_id = measurement_id
|
||
|
self.client_id = client_id
|
||
|
|
||
|
def _build_url(self, domain):
|
||
|
return f"{domain}?measurement_id={self.measurement_id}&api_secret={self.api_secret}"
|
||
|
|
||
|
def _build_request(self, batch):
|
||
|
return {"client_id": self.client_id, "events": batch}
|
||
|
|
||
|
def random_client_id(self):
|
||
|
"""
|
||
|
Utility function for generating a new client ID matching the typical format of 10 random digits and the UNIX timestamp in seconds, joined by a period.
|
||
|
"""
|
||
|
return "%0.10d" % random.randint(0,9999999999) + "." + str(int(time.time()))
|
||
|
|
||
|
class FirebaseMP(BaseGa4mp):
|
||
|
"""
|
||
|
Subclass for users of Firebase. See `Ga4mp` parent class for examples.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
firebase_app_id : string
|
||
|
The identifier for a Firebase app. Found in the Firebase console under: Project Settings > General > Your Apps > App ID.
|
||
|
app_instance_id : string
|
||
|
A unique identifier for a Firebase app instance.
|
||
|
* Android - getAppInstanceId() - https://firebase.google.com/docs/reference/android/com/google/firebase/analytics/FirebaseAnalytics#public-taskstring-getappinstanceid
|
||
|
* Kotlin - getAppInstanceId() - https://firebase.google.com/docs/reference/kotlin/com/google/firebase/analytics/FirebaseAnalytics#getappinstanceid
|
||
|
* Swift - appInstanceID() - https://firebase.google.com/docs/reference/swift/firebaseanalytics/api/reference/Classes/Analytics#appinstanceid
|
||
|
* Objective-C - appInstanceID - https://firebase.google.com/docs/reference/ios/firebaseanalytics/api/reference/Classes/FIRAnalytics#+appinstanceid
|
||
|
* C++ - GetAnalyticsInstanceId() - https://firebase.google.com/docs/reference/cpp/namespace/firebase/analytics#getanalyticsinstanceid
|
||
|
* Unity - GetAnalyticsInstanceIdAsync() - https://firebase.google.com/docs/reference/unity/class/firebase/analytics/firebase-analytics#getanalyticsinstanceidasync
|
||
|
"""
|
||
|
|
||
|
def __init__(self, api_secret, firebase_app_id, app_instance_id):
|
||
|
super().__init__(api_secret)
|
||
|
self.firebase_app_id = firebase_app_id
|
||
|
self.app_instance_id = app_instance_id
|
||
|
|
||
|
def _build_url(self, domain):
|
||
|
return f"{domain}?firebase_app_id={self.firebase_app_id}&api_secret={self.api_secret}"
|
||
|
|
||
|
def _build_request(self, batch):
|
||
|
return {"app_instance_id": self.app_instance_id, "events": batch}
|