Closed
Description
conda, xgboost 1.2.0 , rapids 0.14, Ubuntu 18.04 LTS, GeForce RTX 2080, cuda 10.0.
While the non-sklearn API can take cudf, the sklearn API fails with the error in the title.
Any data set works, just choose for creditcard.csv . For the target given can choose attached dataset
creditcard.csv.zip
i.e. this works:
import cudf
import pynvml
import numpy as np
import pandas as pd
import xgboost as xgb
from math import sqrt
from sklearn.model_selection import KFold, StratifiedKFold, GroupKFold
from sklearn.metrics import mean_squared_error,f1_score, accuracy_score
train = cudf.read_csv("creditcard.csv")
test = cudf.read_csv("creditcard.csv")
y = train['default payment next month']
train = train.drop(['default payment next month'], axis=1)
test = test.drop(['default payment next month'], axis=1)
from cuml import train_test_split
X_train, X_test, y_train, y_test = train_test_split(train, y, test_size=0.2, random_state=123)
dtrain = xgb.DMatrix(X_train,y_train)
dval = xgb.DMatrix(X_test, y_test)
dtest = xgb.DMatrix(test)
evallist = [(dval, 'validation'), (dtrain, 'train')]
num_round=5000
trained_model = xgb.train(
{
'learning_rate': 0.01,
'colsample_bytree' : 0.25,
'max_depth': 10,
'objective': 'binary:logistic',
'silent': True,
'tree_method':'gpu_hist',
},
dtrain,num_round, evallist,verbose_eval=250)
prediction = trained_model.predict(dtest)
but this does not work:
import cudf
import pynvml
import numpy as np
import pandas as pd
import xgboost as xgb
from math import sqrt
from sklearn.model_selection import KFold, StratifiedKFold, GroupKFold
from sklearn.metrics import mean_squared_error,f1_score, accuracy_score
train = cudf.read_csv("creditcard.csv")
test = cudf.read_csv("creditcard.csv")
y = train['default payment next month']
train = train.drop(['default payment next month'], axis=1)
test = test.drop(['default payment next month'], axis=1)
from cuml import train_test_split
X_train, X_test, y_train, y_test = train_test_split(train, y, test_size=0.2, random_state=123)
params = {'learning_rate': 0.01, 'colsample_bytree' : 0.25, 'max_depth': 10, 'silent': True, 'tree_method':'gpu_hist','n_estimators': 5000, 'verbose_eval': 250}
model = xgb.XGBClassifier(**params)
eval_set = [(X_test, y_test), (X_train, y_train)]
model.fit(X_train, y_train, eval_set=eval_set)
prediction = model.predict(test)
fails with:
>>> model.fit(X_train, y_train, eval_set=eval_set)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/jon/miniconda3/lib/python3.6/site-packages/xgboost/sklearn.py", line 858, in fit
self._le = XGBoostLabelEncoder().fit(y)
File "/home/jon/miniconda3/lib/python3.6/site-packages/sklearn/preprocessing/label.py", line 219, in fit
y = column_or_1d(y, warn=True)
File "/home/jon/miniconda3/lib/python3.6/site-packages/sklearn/utils/validation.py", line 749, in column_or_1d
shape = np.shape(y)
File "<__array_function__ internals>", line 6, in shape
File "/home/jon/miniconda3/lib/python3.6/site-packages/cudf/core/series.py", line 734, in __array_function__
return cudf_func(*args, **kwargs)
TypeError: 'property' object is not callable
>>>
I expect I'm doing something wrong since cudf functionality has been in xgboost for a long time now and I assume many people use the sklearn API, but I cannot see what I'm doing wrong. Thanks!
Metadata
Metadata
Assignees
Labels
No labels