티스토리 뷰
Artificial Intelligence/Machine Learning
[ML] GridSearch / Randomsearch /HalvingGridSearch/ HalvingRandomSearch
inee0727 2022. 8. 5. 18:441. 데이터
import numpy as np
import tensorflow as tf #웨이트의 난수
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingRandomSearchCV, HalvingGridSearchCV
from sklearn.model_selection import train_test_split, KFold, cross_val_score
#1. 데이터
datasets = load_iris()
x = datasets['data']
y = datasets.target
x_train, x_test, y_train, y_test = train_test_split(x, y,
train_size=0.8, shuffle=True, random_state=666)
n_splits = 5
kfold = KFold(n_splits=n_splits, shuffle=True, random_state=66)
parameters = [
{'n_estimators' : [100,200,300,400,500], 'max_depth' : [6,10,12,14,16]},
{'max_depth' : [6, 8, 10, 12, 14], 'min_samples_leaf' : [3, 5, 7, 10, 12]},
{'min_samples_leaf' : [3, 5, 7, 10, 12], 'min_samples_split' : [2, 3, 5, 10, 12]},
{'min_samples_split' : [2, 3, 5, 10, 12]},
{'n_jobs' : [-1, 2, 4, 6],'min_samples_split' : [2, 3, 5, 10, 12]}
]
2. 모델구성(GridSearch / Randomsearch /HalvingGridSearch/ HalvingRandomSearch)
#2. 모델구성
from sklearn.ensemble import RandomForestClassifier # DecisionTreeClassifier가 ensemble 엮여있는게 random으로
# model = SVC(C=1, kernel='linear', degree=3)
#[model(1)]
model = GridSearchCV(RandomForestClassifier(), parameters, cv=kfold, verbose=1,
refit=True, n_jobs=1)
#[model(2)]
model = RandomizedSearchCV(RandomForestClassifier(), parameters, cv=kfold, verbose=1,
refit=True, n_jobs=1)
#[model(3)]
model = HalvingGridSearchCV(RandomForestClassifier(),parameters, cv=kfold, verbose=1,
refit=True, n_jobs=-1)
#[model(4)]
model = HalvingRandomSearchCV(RandomForestClassifier(),parameters, cv=kfold, verbose=1,
refit=True, n_jobs=-1)
# HalvingGridSearchCV는 완성되지 않은 버전이므로 위와 같은 사이킷런의 실험적 실행을 추가해줘야한다 # HalvingRandomSearchCV는 완성되지 않은 버전이므로 위와 같은 사이킷런의 실험적 실행을 추가해줘야한다 |
3. 컴파일, 훈련
#3. 컴파일, 훈련
import time
start = time.time()
model.fit(x_train, y_train)
end = time.time()
print("최적의 매개변수 : ", model.best_estimator_) # 가장 좋은 추정치
print("최적의 파라미터 : ", model.best_params_)
print("best_score_ : ", model.best_score_) # 정확도
print("model.score : ", model.score(x_test, y_test))
4. 평가, 예측
#4. 평가, 예측
y_predict = model.predict(x_test)
print("accuracy_score : ", accuracy_score(y_test, y_predict))
y_pred_best = model.best_estimator_.predict(x_test)
print("최적 튠 ACC : ", accuracy_score(y_test,y_pred_best))
print("걸린시간 : ", round(end-start, 4))
# GridSearchCV
# 최적의 매개변수 : RandomForestClassifier(min_samples_leaf=10, min_samples_split=5)
# 최적의 파라미터 : {'min_samples_leaf': 10, 'min_samples_split': 5}
# best_score_ : 0.95
# model.score : 1.0
# accuracy_score : 1.0
# 최적 튠 ACC : 1.0
# 걸린시간 : 14.6592
# RandomizedSearchCV
# 최적의 매개변수 : RandomForestClassifier(min_samples_leaf=12, min_samples_split=3)
# 최적의 파라미터 : {'min_samples_split': 3, 'min_samples_leaf': 12}
# best_score_ : 0.95
# model.score : 1.0
# accuracy_score : 1.0
# 최적 튠 ACC : 1.0
# 걸린시간 : 2.3589
# HalvingGridSearchCV
# 최적의 매개변수 : RandomForestClassifier(max_depth=8, min_samples_leaf=5)
# 최적의 파라미터 : {'max_depth': 8, 'min_samples_leaf': 5}
# best_score_ : 0.9333333333333332
# model.score : 1.0
# accuracy_score : 1.0
# 최적 튠 ACC : 1.0
# 걸린시간 : 23.1831
# HalvingRandomSearchCV
# 최적의 매개변수 : RandomForestClassifier(min_samples_split=5)
# 최적의 파라미터 : {'min_samples_split': 5}
# best_score_ : 0.9333333333333333
# model.score : 1.0
# accuracy_score : 1.0
# 최적 튠 ACC : 1.0
'Artificial Intelligence > Machine Learning' 카테고리의 다른 글
[ML] XGB plot_importance (0) | 2022.08.10 |
---|---|
[ML] Data Preprocessing - Missing Value (결측치 처리) (0) | 2022.08.10 |
[ML] 교차검증 - KFold, StratifiedKFold, cross_val_score, GridSearchCV (0) | 2022.08.04 |
[ML] 머신러닝 알고리즘 (0) | 2022.08.04 |
[ML] SVM (Support Vector Machine) 이란? (0) | 2022.08.03 |