Experimental pageleave events (#4624)

* add experimental pageleave script variant

* also send pageleave events on SPA navigation

* disallow goals with 'pageleave' event name

* do not count pageleaves towards the event metric

* remove duplication in test file

* do not update sessions on pageleave events

* ignore pageleaves in the current time_on_page implementation

* make pageleave events not billable

* rename function

* Prevent multiple pageleaves being sent at the same time
This commit is contained in:
RobertJoonas
2024-09-30 14:52:40 +02:00
committed by GitHub
parent 714c036757
commit 5c72de0155
18 changed files with 331 additions and 125 deletions

View File

@ -15,9 +15,9 @@ defmodule PlausibleWeb.Dogfood do
def script_url() do
if Application.get_env(:plausible, :environment) in ["prod", "staging"] do
"#{PlausibleWeb.Endpoint.url()}/js/script.manual.pageview-props.tagged-events.js"
"#{PlausibleWeb.Endpoint.url()}/js/script.manual.pageview-props.tagged-events.pageleave.js"
else
"#{PlausibleWeb.Endpoint.url()}/js/script.local.manual.pageview-props.tagged-events.js"
"#{PlausibleWeb.Endpoint.url()}/js/script.local.manual.pageview-props.tagged-events.pageleave.js"
end
end

View File

@ -72,25 +72,43 @@ defmodule Plausible.Goal do
end
defp validate_event_name_and_page_path(changeset) do
if validate_page_path(changeset) || validate_event_name(changeset) do
changeset
|> update_change(:event_name, &String.trim/1)
|> update_change(:page_path, &String.trim/1)
else
changeset
|> add_error(:event_name, "this field is required and cannot be blank")
|> add_error(:page_path, "this field is required and must start with a /")
case {validate_page_path(changeset), validate_event_name(changeset)} do
{:ok, _} ->
update_change(changeset, :page_path, &String.trim/1)
{_, :ok} ->
update_change(changeset, :event_name, &String.trim/1)
{{:error, page_path_error}, {:error, event_name_error}} ->
changeset
|> add_error(:event_name, event_name_error)
|> add_error(:page_path, page_path_error)
end
end
defp validate_page_path(changeset) do
value = get_field(changeset, :page_path)
value && String.match?(value, ~r/^\/.*/)
if value && String.match?(value, ~r/^\/.*/) do
:ok
else
{:error, "this field is required and must start with a /"}
end
end
defp validate_event_name(changeset) do
value = get_field(changeset, :event_name)
value && String.match?(value, ~r/^.+/)
cond do
value == "pageleave" ->
{:error, "The event name 'pageleave' is reserved and cannot be used as a goal"}
value && String.match?(value, ~r/^.+/) ->
:ok
true ->
{:error, "this field is required and cannot be blank"}
end
end
defp maybe_drop_currency(changeset) do

View File

@ -34,6 +34,8 @@ defmodule Plausible.Ingestion.Event do
| :site_page_blocklist
| :site_hostname_allowlist
| :verification_agent
| :lock_timeout
| :no_session_for_pageleave
@type t() :: %__MODULE__{
domain: String.t() | nil,
@ -376,6 +378,9 @@ defmodule Plausible.Ingestion.Event do
| clickhouse_event: ClickhouseEventV2.merge_session(event.clickhouse_event, session)
}
{:error, :no_session_for_pageleave} ->
drop(event, :no_session_for_pageleave)
{:error, :timeout} ->
drop(event, :lock_timeout)
end

View File

@ -8,7 +8,26 @@ defmodule Plausible.Session.CacheStore do
def lock_telemetry_event, do: @lock_telemetry_event
def on_event(event, session_attributes, prev_user_id, buffer_insert \\ &WriteBuffer.insert/1) do
def on_event(event, session_attributes, prev_user_id, buffer_insert \\ &WriteBuffer.insert/1)
def on_event(%{name: "pageleave"} = event, _, prev_user_id, _) do
# The `pageleave` event is currently experimental. In a real use case we would
# probably want to update the session as well (e.g. `is_bounce` or `duration`).
# However, for now we're only interested in finding out the success rate of
# pageleave events. So these events will simply be inserted into the events
# table with the session ID found from the cache. If there's no session, the
# event will be dropped.
found_session = find_session(event, event.user_id) || find_session(event, prev_user_id)
if found_session do
{:ok, found_session}
else
{:error, :no_session_for_pageleave}
end
end
def on_event(event, session_attributes, prev_user_id, buffer_insert) do
lock_requested_at = System.monotonic_time()
Plausible.Cache.Adapter.with_lock(

View File

@ -50,6 +50,7 @@ defmodule Plausible.Stats.Aggregate do
defp aggregate_time_on_page(site, query) do
windowed_pages_q =
from e in base_event_query(site, Query.remove_top_level_filters(query, ["event:page"])),
where: e.name != "pageleave",
select: %{
next_timestamp: over(fragment("leadInFrame(?)", e.timestamp), :event_horizon),
next_pathname: over(fragment("leadInFrame(?)", e.pathname), :event_horizon),

View File

@ -93,6 +93,7 @@ defmodule Plausible.Stats.Breakdown do
site,
Query.remove_top_level_filters(query, ["event:page", "event:props"])
),
where: e.name != "pageleave",
select: %{
next_timestamp: over(fragment("leadInFrame(?)", e.timestamp), :event_horizon),
next_pathname: over(fragment("leadInFrame(?)", e.pathname), :event_horizon),

View File

@ -56,6 +56,7 @@ defmodule Plausible.Stats.Clickhouse do
ClickhouseRepo.one(
from(e in "events_v2",
where: e.site_id in ^site_ids,
where: e.name != "pageleave",
where: fragment("toDate(?)", e.timestamp) >= ^date_range.first,
where: fragment("toDate(?)", e.timestamp) <= ^date_range.last,
select: {

View File

@ -197,8 +197,8 @@ defmodule Plausible.Stats.SQL.Expression do
end
def event_metric(:events) do
wrap_alias([], %{
events: fragment("toUInt64(round(count(*) * any(_sample_factor)))")
wrap_alias([e], %{
events: fragment("toUInt64(round(countIf(? != 'pageleave') * any(_sample_factor)))", e.name)
})
end

View File

@ -12,7 +12,8 @@ defmodule PlausibleWeb.Tracker do
"file-downloads",
"pageview-props",
"tagged-events",
"revenue"
"revenue",
"pageleave"
]
# Generates Power Set of all variants

View File

@ -723,6 +723,27 @@ defmodule Plausible.Billing.QuotaTest do
} = Quota.Usage.monthly_pageview_usage(user)
end
test "pageleave events are not counted towards monthly pageview usage" do
user = insert(:user) |> Plausible.Users.with_subscription()
site = insert(:site, members: [user])
now = NaiveDateTime.utc_now()
populate_stats(site, [
build(:event, timestamp: Timex.shift(now, days: -8), name: "custom"),
build(:pageview, user_id: 199, timestamp: Timex.shift(now, days: -5, minutes: -2)),
build(:event, user_id: 199, timestamp: Timex.shift(now, days: -5), name: "pageleave")
])
assert %{
last_30_days: %{
total: 2,
custom_events: 1,
pageviews: 1,
date_range: %{}
}
} = Quota.Usage.monthly_pageview_usage(user)
end
test "returns usage for user with subscription and a site" do
today = Date.utc_today()

View File

@ -55,6 +55,14 @@ defmodule Plausible.GoalsTest do
assert {"has already been taken", _} = changeset.errors[:event_name]
end
test "create/2 fails to create a goal with 'pageleave' as event_name (reserved)" do
site = insert(:site)
assert {:error, changeset} = Goals.create(site, %{"event_name" => "pageleave"})
assert {"The event name 'pageleave' is reserved and cannot be used as a goal", _} =
changeset.errors[:event_name]
end
@tag :ee_only
test "create/2 sets site.updated_at for revenue goal" do
site_1 = insert(:site, updated_at: DateTime.add(DateTime.utc_now(), -3600))

View File

@ -318,6 +318,22 @@ defmodule Plausible.Ingestion.EventTest do
assert dropped.drop_reason == :lock_timeout
end
test "drops pageleave event when no session found from cache" do
site = insert(:site)
payload = %{
name: "pageleave",
url: "https://#{site.domain}/123",
d: "#{site.domain}"
}
conn = build_conn(:post, "/api/events", payload)
assert {:ok, request} = Request.build(conn)
assert {:ok, %{buffered: [], dropped: [dropped]}} = Event.build_and_buffer(request)
assert dropped.drop_reason == :no_session_for_pageleave
end
@tag :ee_only
test "saves revenue amount" do
site = insert(:site)

View File

@ -3,6 +3,22 @@ defmodule Plausible.Session.CacheStoreTest do
alias Plausible.Session.CacheStore
@session_params %{
referrer: "ref",
referrer_source: "refsource",
utm_medium: "medium",
utm_source: "source",
utm_campaign: "campaign",
utm_content: "content",
utm_term: "term",
browser: "browser",
browser_version: "55",
country_code: "EE",
screen_size: "Desktop",
operating_system: "Mac",
operating_system_version: "11"
}
setup do
current_pid = self()
@ -40,23 +56,7 @@ defmodule Plausible.Session.CacheStoreTest do
event2 = build(:event, name: "pageview", user_id: event1.user_id, site_id: event1.site_id)
event3 = build(:event, name: "pageview", user_id: event1.user_id, site_id: event1.site_id)
session_params = %{
referrer: "ref",
referrer_source: "refsource",
utm_medium: "medium",
utm_source: "source",
utm_campaign: "campaign",
utm_content: "content",
utm_term: "term",
browser: "browser",
browser_version: "55",
country_code: "EE",
screen_size: "Desktop",
operating_system: "Mac",
operating_system_version: "11"
}
CacheStore.on_event(event1, session_params, nil, buffer)
CacheStore.on_event(event1, @session_params, nil, buffer)
assert_receive({:buffer, :insert, [[session1]]})
assert_receive({:telemetry_handled, duration})
@ -65,7 +65,7 @@ defmodule Plausible.Session.CacheStoreTest do
[event2, event3]
|> Enum.map(fn e ->
Task.async(fn ->
CacheStore.on_event(e, session_params, nil, slow_buffer)
CacheStore.on_event(e, @session_params, nil, slow_buffer)
end)
end)
|> Task.await_many()
@ -120,25 +120,9 @@ defmodule Plausible.Session.CacheStoreTest do
event2 = build(:event, name: "pageview", user_id: event1.user_id, site_id: event1.site_id)
event3 = build(:event, name: "pageview", user_id: event1.user_id, site_id: event1.site_id)
session_params = %{
referrer: "ref",
referrer_source: "refsource",
utm_medium: "medium",
utm_source: "source",
utm_campaign: "campaign",
utm_content: "content",
utm_term: "term",
browser: "browser",
browser_version: "55",
country_code: "EE",
screen_size: "Desktop",
operating_system: "Mac",
operating_system_version: "11"
}
async1 =
Task.async(fn ->
CacheStore.on_event(event1, session_params, nil, very_slow_buffer)
CacheStore.on_event(event1, @session_params, nil, very_slow_buffer)
end)
# Ensure next events are executed after processing event1 starts
@ -146,12 +130,12 @@ defmodule Plausible.Session.CacheStoreTest do
async2 =
Task.async(fn ->
CacheStore.on_event(event2, session_params, nil, buffer)
CacheStore.on_event(event2, @session_params, nil, buffer)
end)
async3 =
Task.async(fn ->
CacheStore.on_event(event3, session_params, nil, buffer)
CacheStore.on_event(event3, @session_params, nil, buffer)
end)
Task.await_many([async1, async2, async3])
@ -174,25 +158,9 @@ defmodule Plausible.Session.CacheStoreTest do
event2 = build(:event, name: "pageview")
event3 = build(:event, name: "pageview", user_id: event2.user_id, site_id: event2.site_id)
session_params = %{
referrer: "ref",
referrer_source: "refsource",
utm_medium: "medium",
utm_source: "source",
utm_campaign: "campaign",
utm_content: "content",
utm_term: "term",
browser: "browser",
browser_version: "55",
country_code: "EE",
screen_size: "Desktop",
operating_system: "Mac",
operating_system_version: "11"
}
async1 =
Task.async(fn ->
CacheStore.on_event(event1, session_params, nil, very_slow_buffer)
CacheStore.on_event(event1, @session_params, nil, very_slow_buffer)
end)
# Ensure next events are executed after processing event1 starts
@ -200,14 +168,14 @@ defmodule Plausible.Session.CacheStoreTest do
async2 =
Task.async(fn ->
CacheStore.on_event(event2, session_params, nil, buffer)
CacheStore.on_event(event2, @session_params, nil, buffer)
end)
Process.sleep(100)
async3 =
Task.async(fn ->
CacheStore.on_event(event3, session_params, nil, buffer)
CacheStore.on_event(event3, @session_params, nil, buffer)
end)
Task.await_many([async1, async2, async3])
@ -229,24 +197,8 @@ defmodule Plausible.Session.CacheStoreTest do
event = build(:event, name: "pageview")
session_params = %{
referrer: "ref",
referrer_source: "refsource",
utm_medium: "medium",
utm_source: "source",
utm_campaign: "campaign",
utm_term: "term",
utm_content: "content",
browser: "browser",
browser_version: "55",
country_code: "EE",
screen_size: "Desktop",
operating_system: "Mac",
operating_system_version: "11"
}
assert_raise RuntimeError, "boom", fn ->
CacheStore.on_event(event, session_params, nil, crashing_buffer)
CacheStore.on_event(event, @session_params, nil, crashing_buffer)
end
end
@ -258,23 +210,7 @@ defmodule Plausible.Session.CacheStoreTest do
"meta.value": ["true", "false"]
)
session_params = %{
referrer: "ref",
referrer_source: "refsource",
utm_medium: "medium",
utm_source: "source",
utm_campaign: "campaign",
utm_content: "content",
utm_term: "term",
browser: "browser",
browser_version: "55",
country_code: "EE",
screen_size: "Desktop",
operating_system: "Mac",
operating_system_version: "11"
}
CacheStore.on_event(event, session_params, nil, buffer)
CacheStore.on_event(event, @session_params, nil, buffer)
assert_receive({:buffer, :insert, [sessions]})
assert [session] = sessions
@ -289,19 +225,19 @@ defmodule Plausible.Session.CacheStoreTest do
assert session.duration == 0
assert session.pageviews == 1
assert session.events == 1
assert session.referrer == Map.get(session_params, :referrer)
assert session.referrer_source == Map.get(session_params, :referrer_source)
assert session.utm_medium == Map.get(session_params, :utm_medium)
assert session.utm_source == Map.get(session_params, :utm_source)
assert session.utm_campaign == Map.get(session_params, :utm_campaign)
assert session.utm_content == Map.get(session_params, :utm_content)
assert session.utm_term == Map.get(session_params, :utm_term)
assert session.country_code == Map.get(session_params, :country_code)
assert session.screen_size == Map.get(session_params, :screen_size)
assert session.operating_system == Map.get(session_params, :operating_system)
assert session.operating_system_version == Map.get(session_params, :operating_system_version)
assert session.browser == Map.get(session_params, :browser)
assert session.browser_version == Map.get(session_params, :browser_version)
assert session.referrer == Map.get(@session_params, :referrer)
assert session.referrer_source == Map.get(@session_params, :referrer_source)
assert session.utm_medium == Map.get(@session_params, :utm_medium)
assert session.utm_source == Map.get(@session_params, :utm_source)
assert session.utm_campaign == Map.get(@session_params, :utm_campaign)
assert session.utm_content == Map.get(@session_params, :utm_content)
assert session.utm_term == Map.get(@session_params, :utm_term)
assert session.country_code == Map.get(@session_params, :country_code)
assert session.screen_size == Map.get(@session_params, :screen_size)
assert session.operating_system == Map.get(@session_params, :operating_system)
assert session.operating_system_version == Map.get(@session_params, :operating_system_version)
assert session.browser == Map.get(@session_params, :browser)
assert session.browser_version == Map.get(@session_params, :browser_version)
assert session.timestamp == event.timestamp
assert session.start === event.timestamp
# assert Map.get(session, :"entry.meta.key") == ["logged_in", "darkmode"]
@ -326,6 +262,21 @@ defmodule Plausible.Session.CacheStoreTest do
assert session.events == 2
end
test "does not update session counters on pageleave event", %{buffer: buffer} do
now = Timex.now()
pageview = build(:pageview, timestamp: Timex.shift(now, seconds: -10))
pageleave = %{pageview | name: "pageleave", timestamp: now}
CacheStore.on_event(pageview, %{}, nil, buffer)
CacheStore.on_event(pageleave, %{}, nil, buffer)
assert_receive({:buffer, :insert, [[session]]})
assert session.is_bounce == true
assert session.duration == 0
assert session.pageviews == 1
assert session.events == 1
end
describe "hostname-related attributes" do
test "initial for non-pageview" do
site_id = new_site_id()

View File

@ -1625,6 +1625,32 @@ defmodule PlausibleWeb.Api.ExternalStatsController.AggregateTest do
assert json_response(conn, 200)["results"] == %{"time_on_page" => %{"value" => nil}}
end
test "pageleave events are ignored when querying time on page", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview, user_id: 1234, timestamp: ~N[2021-01-01 12:00:00], pathname: "/1"),
build(:pageview, user_id: 1234, timestamp: ~N[2021-01-01 12:00:05], pathname: "/2"),
build(:event,
name: "pageleave",
user_id: 1234,
timestamp: ~N[2021-01-01 12:01:00],
pathname: "/1"
)
])
conn =
get(conn, "/api/v1/stats/aggregate", %{
"site_id" => site.domain,
"metrics" => "time_on_page",
"filters" => "event:page==/2",
"period" => "day",
"date" => "2021-01-01"
})
assert json_response(conn, 200)["results"] == %{
"time_on_page" => %{"value" => nil}
}
end
test "conversion_rate when goal filter is applied", %{conn: conn, site: site} do
populate_stats(site, [
build(:event, name: "Signup"),

View File

@ -2604,6 +2604,35 @@ defmodule PlausibleWeb.Api.ExternalStatsController.BreakdownTest do
}
end
test "pageleave events are ignored when querying time on page", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview, user_id: 1234, timestamp: ~N[2021-01-01 12:00:00], pathname: "/1"),
build(:pageview, user_id: 1234, timestamp: ~N[2021-01-01 12:00:05], pathname: "/2"),
build(:event,
name: "pageleave",
user_id: 1234,
timestamp: ~N[2021-01-01 12:01:00],
pathname: "/1"
)
])
conn =
get(conn, "/api/v1/stats/breakdown", %{
"site_id" => site.domain,
"property" => "event:page",
"metrics" => "time_on_page",
"period" => "day",
"date" => "2021-01-01"
})
assert json_response(conn, 200) == %{
"results" => [
%{"page" => "/1", "time_on_page" => 5},
%{"page" => "/2", "time_on_page" => nil}
]
}
end
test "returns time_on_page as the only metric in an event:page breakdown", %{
conn: conn,
site: site

View File

@ -101,6 +101,46 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryTest do
]
end
test "does not count pageleave events towards the events metric in a simple aggregate query",
%{conn: conn, site: site} do
populate_stats(site, [
build(:pageview, user_id: 234, timestamp: ~N[2021-01-01 00:00:00]),
build(:event, user_id: 234, name: "pageleave", timestamp: ~N[2021-01-01 00:00:01])
])
conn =
post(conn, "/api/v2/query", %{
"site_id" => site.domain,
"date_range" => "all",
"metrics" => ["events"]
})
assert json_response(conn, 200)["results"] == [
%{"metrics" => [1], "dimensions" => []}
]
end
test "pageleave events do not affect bounce rate and visit duration", %{
conn: conn,
site: site
} do
populate_stats(site, [
build(:pageview, user_id: 123, timestamp: ~N[2021-01-01 00:00:00]),
build(:event, user_id: 123, name: "pageleave", timestamp: ~N[2021-01-01 00:00:03])
])
conn =
post(conn, "/api/v2/query", %{
"site_id" => site.domain,
"date_range" => "all",
"metrics" => ["bounce_rate", "visit_duration"]
})
assert json_response(conn, 200)["results"] == [
%{"metrics" => [100, 0], "dimensions" => []}
]
end
test "can filter by channel", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview,

View File

@ -26,7 +26,7 @@ function compilefile(input, output, templateVars = {}) {
}
}
const base_variants = ["hash", "outbound-links", "exclusions", "compat", "local", "manual", "file-downloads", "pageview-props", "tagged-events", "revenue"]
const base_variants = ["hash", "outbound-links", "exclusions", "compat", "local", "manual", "file-downloads", "pageview-props", "tagged-events", "revenue", "pageleave"]
const variants = [...g.clone.powerSet(base_variants)].filter(a => a.length > 0).map(a => a.sort());
compilefile(relPath('src/plausible.js'), relPath('../priv/tracker/js/plausible.js'))

View File

@ -10,6 +10,7 @@
var scriptEl = document.currentScript;
{{/if}}
var endpoint = scriptEl.getAttribute('data-api') || defaultEndpoint(scriptEl)
var dataDomain = scriptEl.getAttribute('data-domain')
function onIgnoredEvent(reason, options) {
if (reason) console.warn('Ignoring Event: ' + reason);
@ -27,6 +28,53 @@
{{/if}}
}
{{#if pageleave}}
// :NOTE: Tracking pageleave events is currently experimental.
// Multiple pageviews might be sent by the same script when the page
// uses client-side routing (e.g. hash or history-based). This flag
// prevents registering multiple listeners in those cases.
var listeningPageLeave = false
// In SPA-s, multiple listeners that trigger the pageleave event
// might fire nearly at the same time. E.g. when navigating back
// in browser history while using hash-based routing - a popstate
// and hashchange will be fired in a very quick succession. This
// flag prevents sending multiple pageleaves in those cases.
var pageLeaveSending = false
function triggerPageLeave(url) {
if (pageLeaveSending) {return}
pageLeaveSending = true
setTimeout(function () {pageLeaveSending = false}, 500)
var payload = {
n: 'pageleave',
d: dataDomain,
u: url,
}
{{#if hash}}
payload.h = 1
{{/if}}
if (navigator.sendBeacon) {
var blob = new Blob([JSON.stringify(payload)], { type: 'text/plain' });
navigator.sendBeacon(endpoint, blob)
}
}
function registerPageLeaveListener(url) {
if (listeningPageLeave) { return }
window.addEventListener('pagehide', function () {
triggerPageLeave(url)
})
listeningPageLeave = true
}
{{/if}}
function trigger(eventName, options) {
{{#unless local}}
@ -73,7 +121,7 @@
{{else}}
payload.u = location.href
{{/if}}
payload.d = scriptEl.getAttribute('data-domain')
payload.d = dataDomain
payload.r = document.referrer || null
if (options && options.meta) {
payload.m = JSON.stringify(options.meta)
@ -115,6 +163,11 @@
request.onreadystatechange = function() {
if (request.readyState === 4) {
{{#if pageleave}}
if (eventName === 'pageview') {
registerPageLeaveListener(payload.u)
}
{{/if}}
options && options.callback && options.callback({status: request.status})
}
}
@ -129,25 +182,41 @@
{{#unless manual}}
var lastPage;
function page() {
{{#if pageleave}}
var lastUrl = location.href
function pageLeaveSPA() {
triggerPageLeave(lastUrl);
lastUrl = location.href;
}
{{/if}}
function page(isSPANavigation) {
{{#unless hash}}
if (lastPage === location.pathname) return;
{{/unless}}
{{#if pageleave}}
if (isSPANavigation) {pageLeaveSPA()}
{{/if}}
lastPage = location.pathname
trigger('pageview')
}
var onSPANavigation = function() {page(true)}
{{#if hash}}
window.addEventListener('hashchange', page)
window.addEventListener('hashchange', onSPANavigation)
{{else}}
var his = window.history
if (his.pushState) {
var originalPushState = his['pushState']
his.pushState = function() {
originalPushState.apply(this, arguments)
page();
onSPANavigation();
}
window.addEventListener('popstate', page)
window.addEventListener('popstate', onSPANavigation)
}
{{/if}}