Script 339: Campaign Benchmark Performance

Purpose

Tag Dimensions based on CVR, CPL, CPC, CTR Performance compared to previous 6 months

To Elaborate

The Python script aims to tag dimensions (campaigns) based on their performance in terms of CVR (Conversion Rate), CPL (Cost per Conversion), CPC (Cost per Click), and CTR (Click Through Rate). The script compares the performance of each dimension to a benchmark value and assigns a performance tag (Over Target, Under Target, or On Target) based on the comparison. The benchmark value is calculated as the median of the respective metric over the previous 6 months. The script also handles cleaning and conversion of percentage values to numeric values.

Walking Through the Code

The script defines column names for output and input data.
Empty output columns are set up in the output dataframe.
The script defines a function clean_and_convert_to_numeric to clean and convert values to numeric format, handling both numeric and percentage values.
The script defines a function get_col_set_tuple to construct a tuple of column names for marin dimensions based on a naming convention.
The script defines a function tag_by_performance to tag dimensions based on their performance compared to a benchmark value.
The function tag_by_performance is called for each of the 4 metrics (CVR, CPL, CPC, CTR) to tag dimensions in the output dataframe.
The script combines the changes in performance tags for all metrics.
Debug information is printed for campaigns with changed CVR performance tags.
The output dataframe is filtered to include only campaigns with changed performance tags.

Vitals

Script ID : 339
Client ID / Customer ID: 1306923689 / 60269255
Action Type: Bulk Upload
Item Changed: Campaign
Output Columns: Account, Campaign, CPC $ - Performance, Cost/Conv $ - Performance, CTR % - Performance, Conv Rate % - Performance
Linked Datasource: M1 Report
Reference Datasource: None
Owner: ascott@marinsoftware.com (ascott@marinsoftware.com)
Created by ascott@marinsoftware.com on 2023-10-11 17:58
Last Updated by ascott@marinsoftware.com on 2023-12-06 04:01

> See it in Action

Python Code

# columns for output
OUT_COL_MD_DEBUG_PERF_CALC = 'Performance Calc Raw Numbers'
OUT_COL_MD_DEBUG_PERF_CALC_LAST_RUN = 'Performance Calc Last Run Date'

# columns for input
IN_COL_METRIC_CVR = 'Conv. Rate %'
IN_COL_METRIC_CPA = 'Cost/Conv. $'
IN_COL_METRIC_CTR = 'CTR %'
IN_COL_METRIC_CPC = 'Avg. CPC $'


# columns for both input and output
IN_OUT_COL_CAMPAIGN = 'Campaign'
IN_OUT_COL_MD_PERF_CVR = 'Conv Rate % - Performance'
IN_OUT_COL_MD_PERF_CPA = 'Cost/Conv $ - Performance'
IN_OUT_COL_MD_PERF_CTR = 'CTR % - Performance'
IN_OUT_COL_MD_PERF_CPC = 'CPC $ - Performance'

# setup empty output columns
outputDf[OUT_COL_MD_DEBUG_PERF_CALC] = np.nan
#outputDf[OUT_COL_MD_DEBUG_PERF_CALC_LAST_RUN] = datetime.date.today().isoformat()
outputDf[IN_OUT_COL_MD_PERF_CVR] = np.nan
outputDf[IN_OUT_COL_MD_PERF_CPA] = np.nan
outputDf[IN_OUT_COL_MD_PERF_CTR] = np.nan
outputDf[IN_OUT_COL_MD_PERF_CPC] = np.nan

# Ensure that columns with percentage values (e.g., 'Conv. Rate %', 'CTR %') are properly cleaned and converted to numeric
# We need to handle both numeric and percentage values
def clean_and_convert_to_numeric(value):
    if isinstance(value, str):
        cleaned_value = value.replace(',', '')  # Remove commas (e.g., for numbers like '1,000')
        if cleaned_value.endswith('%'):
            numeric_value = float(cleaned_value.rstrip('%')) / 100  # Convert percentage to float
            print(f"Converted {value} to {numeric_value}")
            return numeric_value
        else:
            try:
                numeric_value = float(cleaned_value)
                print(f"Converted {value} to {numeric_value}")
                return numeric_value
            except ValueError:
                print(f"Unable to convert {value} to numeric, setting to np.nan")
                return np.nan
    elif isinstance(value, (float, int)):
        return float(value)
    else:
        return np.nan



# constructs tuple of 4 column names with names of marin dimensions columns based on convention used
def get_col_set_tuple(col_name):
	col_name_clean = col_name.replace('.','')
	return (
		col_name,
		col_name_clean + ' - Avg - Benchmark',
		col_name_clean + ' - High - Criteria',
		col_name_clean + ' - Low - Criteria'
	)


# set col_tag column on outDf with performance tag by comparing col_metric with col_benchmark
# returns array of boolean indicating changed rows in inDf
def tag_by_performance(inDf, outDf, col_set_tuple, col_tag):

	(col_metric, col_benchmark, col_over_margin, col_under_margin) = col_set_tuple

	# setup tmp columns for intrim values
	col_tag_name_new_value = col_tag + "_new"
	col_perf_ratio = col_tag + "_perf_ratio"

	inDf[col_tag_name_new_value] = np.nan

	# Ensure that columns with percentage values (e.g., 'Conv. Rate %', 'CTR %') are properly cleaned and converted to numeric
	inDf[col_metric] = inDf[col_metric].apply(clean_and_convert_to_numeric)
	inDf[col_benchmark] = inDf[col_benchmark].apply(clean_and_convert_to_numeric)
	inDf[col_over_margin] = inDf[col_over_margin].apply(clean_and_convert_to_numeric)
	inDf[col_under_margin] = inDf[col_under_margin].apply(clean_and_convert_to_numeric)

	# calculate benchmark if none provided
	median_metric = inputDf[col_metric].median()
	print(col_metric, "median", median_metric)
	inDf[col_benchmark].fillna({col_benchmark: median_metric}, inplace=True)
		
	# calc relative performance to benchmark
	inDf[col_perf_ratio] = inputDf[col_metric] / inputDf[col_benchmark]

	# separate into different performance groups
	array_benchmark_not_defined = inDf[col_benchmark] == np.nan
	array_benchmark_zero = inDf[col_benchmark] <= 0.0
	array_no_benchmark =  np.logical_or(array_benchmark_not_defined, array_benchmark_zero)

	array_over_perf = (inDf[col_perf_ratio] - 1.0) > inDf[col_over_margin]
	array_under_perf = (1.0 - inDf[col_perf_ratio]) > inDf[col_under_margin]
	array_on_target = np.logical_not(np.logical_or(array_no_benchmark, np.logical_or(array_over_perf, array_under_perf)))

	inDf.loc[ array_over_perf, col_tag_name_new_value ] = 'Over Target'
	inDf.loc[ array_under_perf, col_tag_name_new_value ] = 'Under Target'
	inDf.loc[ array_on_target, col_tag_name_new_value ] = 'On Target'
	#inDf.loc[ array_no_benchmark, col_tag_name_new_value ] = 'No Benchmark'

	# find changed perf tags
	changed = inDf[col_tag_name_new_value].notnull() & (inDf[col_tag] != inDf[col_tag_name_new_value])

	outDf[col_tag] = inDf[col_tag_name_new_value]

	return changed


# tag by performance for each of the 4 metrics

changed1 = tag_by_performance(
				inputDf, 
				outputDf,
				get_col_set_tuple(IN_COL_METRIC_CVR), 
				IN_OUT_COL_MD_PERF_CVR)

changed2 = tag_by_performance(
				inputDf, 
				outputDf,
				get_col_set_tuple(IN_COL_METRIC_CPA), 
				IN_OUT_COL_MD_PERF_CPA)

changed3 = tag_by_performance(
				inputDf, 
				outputDf,
				get_col_set_tuple(IN_COL_METRIC_CTR), 
				IN_OUT_COL_MD_PERF_CTR)

changed4 = tag_by_performance(
				inputDf, 
				outputDf,
				(IN_COL_METRIC_CPC,
				'CPC $ - Avg - Benchmark',
				'CPC $ - High - Criteria',
				'CPC $ - Low - Criteria'
				),
				IN_OUT_COL_MD_PERF_CPC)

combined_changes = np.logical_or(changed4, np.logical_or(changed3, np.logical_or(changed1, changed2)))

# debug print what's changed
print("== Campaigns with CVR Perf Tag changes ==")
in_cols = [IN_OUT_COL_CAMPAIGN, IN_OUT_COL_MD_PERF_CVR] + list(get_col_set_tuple(IN_COL_METRIC_CVR))
out_cols = [IN_OUT_COL_CAMPAIGN, IN_OUT_COL_MD_PERF_CVR]
inDf_changed = inputDf.loc[changed1, in_cols]
outDf_changed = outputDf.loc[changed1, out_cols]
debug_df = inDf_changed.join(outDf_changed, rsuffix="_out")
print(debug_df.to_string())


# only include campaigns with changed perf tag in bulk file
outputDf = outputDf[ combined_changes ]

Post generated on 2024-05-15 07:44:05 GMT

11 Oct 2023

« Script 337: Budget Alert Script 341: MediaType Autotagging »

MarinOne Scripts Creator's Corner