Script 423: Campaign Benchmark Performance
Purpose
Tag Dimensions based on CVR, CPL, CPC, CTR Performance compared to previous 6 months.
To Elaborate
The Python script solves the problem of tagging campaign dimensions based on their performance metrics (CVR, CPL, CPC, CTR) compared to the benchmarks set in the previous 6 months. The script calculates the performance ratio of each metric to its benchmark and assigns a performance tag (Over Target, Under Target, On Target) based on predefined criteria. The script also handles cases where the benchmark is not defined or is zero.
Walking Through the Code
- Define the column names for output and input data.
- Set up empty output columns in the output dataframe.
- Define a function
clean_and_convert_to_numeric
to clean and convert values to numeric, handling both numeric and percentage values. - Define a function
get_col_set_tuple
to construct a tuple of column names for marin dimensions based on a convention. - Define a function
tag_by_performance
to tag dimensions based on performance metrics and benchmarks.- Clean and convert the metric and benchmark columns to numeric values.
- Calculate the benchmark if none is provided.
- Calculate the performance ratio of the metric to the benchmark.
- Separate the dimensions into different performance groups (Over Target, Under Target, On Target) based on predefined criteria.
- Update the tag column in the output dataframe with the new performance tags.
- Return an array of boolean values indicating the changed rows in the input dataframe.
- Call the
tag_by_performance
function for each of the 4 metrics (CVR, CPL, CPC, CTR). - Combine the changes from all metrics into a single array.
- Print the campaigns with changed CVR performance tags for debugging purposes.
- Update the output dataframe to only include campaigns with changed performance tags.
Vitals
- Script ID : 423
- Client ID / Customer ID: 1306925163 / 60269255
- Action Type: Bulk Upload
- Item Changed: Campaign
- Output Columns: Account, Campaign, Conv Rate % - Performance, Cost/Conv $ - Performance, CPC $ - Performance, CTR % - Performance
- Linked Datasource: M1 Report
- Reference Datasource: None
- Owner: ascott@marinsoftware.com (ascott@marinsoftware.com)
- Created by ascott@marinsoftware.com on 2023-10-25 18:24
- Last Updated by ascott@marinsoftware.com on 2023-12-06 04:01
> See it in Action
Python Code
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
## columns for output
OUT_COL_MD_DEBUG_PERF_CALC = 'Performance Calc Raw Numbers'
OUT_COL_MD_DEBUG_PERF_CALC_LAST_RUN = 'Performance Calc Last Run Date'
# columns for input
IN_COL_METRIC_CVR = 'Conv. Rate %'
IN_COL_METRIC_CPA = 'Cost/Conv. $'
IN_COL_METRIC_CTR = 'CTR %'
IN_COL_METRIC_CPC = 'Avg. CPC $'
# columns for both input and output
IN_OUT_COL_CAMPAIGN = 'Campaign'
IN_OUT_COL_MD_PERF_CVR = 'Conv Rate % - Performance'
IN_OUT_COL_MD_PERF_CPA = 'Cost/Conv $ - Performance'
IN_OUT_COL_MD_PERF_CTR = 'CTR % - Performance'
IN_OUT_COL_MD_PERF_CPC = 'CPC $ - Performance'
# setup empty output columns
outputDf[OUT_COL_MD_DEBUG_PERF_CALC] = np.nan
#outputDf[OUT_COL_MD_DEBUG_PERF_CALC_LAST_RUN] = datetime.date.today().isoformat()
outputDf[IN_OUT_COL_MD_PERF_CVR] = np.nan
outputDf[IN_OUT_COL_MD_PERF_CPA] = np.nan
outputDf[IN_OUT_COL_MD_PERF_CTR] = np.nan
outputDf[IN_OUT_COL_MD_PERF_CPC] = np.nan
# Ensure that columns with percentage values (e.g., 'Conv. Rate %', 'CTR %') are properly cleaned and converted to numeric
# We need to handle both numeric and percentage values
def clean_and_convert_to_numeric(value):
if isinstance(value, str):
cleaned_value = value.replace(',', '') # Remove commas (e.g., for numbers like '1,000')
if cleaned_value.endswith('%'):
numeric_value = float(cleaned_value.rstrip('%')) / 100 # Convert percentage to float
print(f"Converted {value} to {numeric_value}")
return numeric_value
else:
try:
numeric_value = float(cleaned_value)
print(f"Converted {value} to {numeric_value}")
return numeric_value
except ValueError:
print(f"Unable to convert {value} to numeric, setting to np.nan")
return np.nan
elif isinstance(value, (float, int)):
return float(value)
else:
return np.nan
# constructs tuple of 4 column names with names of marin dimensions columns based on convention used
def get_col_set_tuple(col_name):
col_name_clean = col_name.replace('.','')
return (
col_name,
col_name_clean + ' - Avg - Benchmark',
col_name_clean + ' - High - Criteria',
col_name_clean + ' - Low - Criteria'
)
# set col_tag column on outDf with performance tag by comparing col_metric with col_benchmark
# returns array of boolean indicating changed rows in inDf
def tag_by_performance(inDf, outDf, col_set_tuple, col_tag):
(col_metric, col_benchmark, col_over_margin, col_under_margin) = col_set_tuple
# setup tmp columns for intrim values
col_tag_name_new_value = col_tag + "_new"
col_perf_ratio = col_tag + "_perf_ratio"
inDf[col_tag_name_new_value] = np.nan
# Ensure that columns with percentage values (e.g., 'Conv. Rate %', 'CTR %') are properly cleaned and converted to numeric
inDf[col_metric] = inDf[col_metric].apply(clean_and_convert_to_numeric)
inDf[col_benchmark] = inDf[col_benchmark].apply(clean_and_convert_to_numeric)
inDf[col_over_margin] = inDf[col_over_margin].apply(clean_and_convert_to_numeric)
inDf[col_under_margin] = inDf[col_under_margin].apply(clean_and_convert_to_numeric)
# calculate benchmark if none provided
median_metric = inputDf[col_metric].median()
print(col_metric, "median", median_metric)
inDf[col_benchmark].fillna({col_benchmark: median_metric}, inplace=True)
# calc relative performance to benchmark
inDf[col_perf_ratio] = inputDf[col_metric] / inputDf[col_benchmark]
# separate into different performance groups
array_benchmark_not_defined = inDf[col_benchmark] == np.nan
array_benchmark_zero = inDf[col_benchmark] <= 0.0
array_no_benchmark = np.logical_or(array_benchmark_not_defined, array_benchmark_zero)
array_over_perf = (inDf[col_perf_ratio] - 1.0) > inDf[col_over_margin]
array_under_perf = (1.0 - inDf[col_perf_ratio]) > inDf[col_under_margin]
array_on_target = np.logical_not(np.logical_or(array_no_benchmark, np.logical_or(array_over_perf, array_under_perf)))
inDf.loc[ array_over_perf, col_tag_name_new_value ] = 'Over Target'
inDf.loc[ array_under_perf, col_tag_name_new_value ] = 'Under Target'
inDf.loc[ array_on_target, col_tag_name_new_value ] = 'On Target'
#inDf.loc[ array_no_benchmark, col_tag_name_new_value ] = 'No Benchmark'
# find changed perf tags
changed = inDf[col_tag_name_new_value].notnull() & (inDf[col_tag] != inDf[col_tag_name_new_value])
outDf[col_tag] = inDf[col_tag_name_new_value]
return changed
# tag by performance for each of the 4 metrics
changed1 = tag_by_performance(
inputDf,
outputDf,
get_col_set_tuple(IN_COL_METRIC_CVR),
IN_OUT_COL_MD_PERF_CVR)
changed2 = tag_by_performance(
inputDf,
outputDf,
get_col_set_tuple(IN_COL_METRIC_CPA),
IN_OUT_COL_MD_PERF_CPA)
changed3 = tag_by_performance(
inputDf,
outputDf,
get_col_set_tuple(IN_COL_METRIC_CTR),
IN_OUT_COL_MD_PERF_CTR)
changed4 = tag_by_performance(
inputDf,
outputDf,
(IN_COL_METRIC_CPC,
'CPC $ - Avg - Benchmark',
'CPC $ - High - Criteria',
'CPC $ - Low - Criteria'
),
IN_OUT_COL_MD_PERF_CPC)
combined_changes = np.logical_or(changed4, np.logical_or(changed3, np.logical_or(changed1, changed2)))
# debug print what's changed
print("== Campaigns with CVR Perf Tag changes ==")
in_cols = [IN_OUT_COL_CAMPAIGN, IN_OUT_COL_MD_PERF_CVR] + list(get_col_set_tuple(IN_COL_METRIC_CVR))
out_cols = [IN_OUT_COL_CAMPAIGN, IN_OUT_COL_MD_PERF_CVR]
inDf_changed = inputDf.loc[changed1, in_cols]
outDf_changed = outputDf.loc[changed1, out_cols]
debug_df = inDf_changed.join(outDf_changed, rsuffix="_out")
print(debug_df.to_string())
# only include campaigns with changed perf tag in bulk file
outputDf = outputDf[ combined_changes ]
Post generated on 2024-05-15 07:44:05 GMT