Privacy optionsΒΆ

filtering with threshold

CSeries.with_threshold()

import crandas as cd

"""Test setting a threshold on the output size of a filtering"""
cdf = cd.DataFrame({"a": [5,4,7,8,2,1,0,9,3,6]})

# Option 1: using CSeries.with_threshold

assert len(cdf[(cdf["a"]<5).with_threshold(4)])==5
assert len(cdf[(cdf["a"]<5).with_threshold(5)])==5
with pytest.raises(cd.errors.ServerError) as exc:
    cdf[(cdf["a"]<5).with_threshold(6)]
assert "threshold" in str(exc)

# Option 2: using threshold parameter of CDataFrame.filter

assert len(cdf.filter(cdf["a"]<5, threshold=4))==5
assert len(cdf.filter(cdf["a"]<5, threshold=5))==5
with pytest.raises(cd.errors.ServerError) as exc:
    assert len(cdf.filter(cdf["a"]<5, threshold=6))==5
assert "threshold" in str(exc)

column-wise aggregation functions accept threshold parameters:

CSeries.sum() CSeries.sum_squares() CSeries.mean() CSeries.var() CSeries.count() CSeries.max() CSeries.min()

import crandas as cd
import pandas as pd
from crandas.errors import ServerError, ServerErrorCode
import numpy
import random
import pytest
from pathlib import Path


"""Tests whether the threshold parameter works as expected by attempting to
perform an invalid ioeration"""
n = 100
numpy.random.seed(1)
table = cd.DataFrame({"key": range(n), "value": numpy.random.randint(0, 100, size=n)})
filtered = table["key"] == 5

with pytest.raises(ServerError) as exc:
    table[filtered]["value"].sum(threshold=5)
assert "threshold" in str(exc)