In [ ]:
labels = get_fraud_events_dataframe()

X = ChalkClient().offline_query(
    input=labels[[User.uid, timestamp]],
    output=[
        User.returned_transactions_last_60,
        User.user_account_name_match_score,
        User.socure_score,
        User.identity.has_verified_phone,
        User.identity.is_voip_phone,
        User.identity.account_age_days,
        User.identity.email_age,
    ],
)
In [ ]:
# xgboost train / predict
X_train, X_test, y_train, y_test = \
  train_test_split(
    X, labels, test_size=0.3
  )
xgb = XGBClassifier(
    eval_metric="logloss", 
    use_label_encoder=False
)
xgb.fit(X_train, y_train)
y_pred = xgb.predict(X_test)