import polars as pl
import random
21)
random.seed(= pl.DataFrame(
data
{"colour": random.choices(["Red", "Blue", "Yellow"], k=50),
"value":random.choices(range(1, 20), k=50)
={'colour': pl.Categorical, 'value': pl.Int16}
}, schema
)
print(data)
Custom function in Polars
As someone who works across data science and operations research, I spend a lot of time wrangling large datasets—sometimes messy, sometimes massive, often both. For years, pandas was my go-to tool, but once I found Polars, I haven’t looked back. Okay, fine, I did look back once or twice but they don’t count.
My example will be calculating percentage of total formatted as percentage.
- Input is a list of numbers:
[100, 100, 300]
- Output is a list of percentage:
[20%, 20%, 60%]
Expression
In Polars, an expression is a lazy representation of a data transformation. Expressions are modular and flexible, which means you can use them as building blocks to build more complex expressions (Polars 2025).
In this super simply example, I want to add one to the value
column then multiple by ten.
Approach 1: Use Polars built-in function
print(
data.with_columns("value").add(1).mul(10).alias("result"),
pl.col("value") + 1) * 10).alias("result2"),
((pl.col(
) )
Approach 2: Python custom function
def add_one_multiply_ten(input_num: int) -> int:
return (input_num + 1) * 10
print(
data.with_columns("value")
pl.col(=pl.Int16)
.map_elements(add_one_multiply_ten, return_dtype"result")
.alias(
) )
Approach 3: Polars custom function
Here I created 2 custom functions to achieve the same result. The first one uses Polars built-in functions, and the second one uses UDF from appraoch 2. See Polars Documentation on Extending API for other examples.
@pl.api.register_expr_namespace("me")
class Me:
def __init__(self, expr: pl.Expr) -> None:
self._expr = expr
def add_one_mul_ten(self) -> pl.Expr:
return self._expr.add(1).mul(10)
def add_one(self) -> pl.Expr:
return self._expr.add(1)
def mul_ten(self) -> pl.Expr:
return self._expr.mul(10)
The best part about this approach is that I can chain the custom functions to all expressions!
print(
data.with_columns("value").me.add_one_mul_ten().alias("udf"),
pl.col("value").me.add_one().me.mul_ten().alias("chain_udfs"),
pl.col("value").max().me.add_one_mul_ten().alias("max_then_udf"),
pl.col("value").me.add_one_mul_ten().truediv(50).ceil().alias("udf_then_func"),
pl.col(
) )
Additionally, I can apply the function to group by as well.
print(
"colour").agg(
data.group_by("value").mean().alias("avg"),
pl.col("value").mean().me.add_one_mul_ten().alias("avg_then_udf"),
pl.col("value").mean().me.add_one_mul_ten().round().alias("avg_then_udf_round"),
pl.col(
)
)#
Series
I rarely use Series.
def spongebob_case(input_txt: str) -> str:
= ""
result for i in range(len(input_txt)):
if (i % 2) == 0:
+= input_txt[i].lower()
result else:
+= input_txt[i].upper()
result return result