labels = get_fraud_events_dataframe()
X = ChalkClient().offline_query(
input=labels[[User.uid, timestamp]],
output=[
User.returned_transactions_last_60,
User.user_account_name_match_score,
User.socure_score,
User.identity.has_verified_phone,
User.identity.is_voip_phone,
User.identity.account_age_days,
User.identity.email_age,
],
)
# xgboost train / predict
X_train, X_test, y_train, y_test = \
train_test_split(
X, labels, test_size=0.3
)
xgb = XGBClassifier(
eval_metric="logloss",
use_label_encoder=False
)
xgb.fit(X_train, y_train)
y_pred = xgb.predict(X_test)