#set_h2o_cluster("10.111.32.12", 54321) external h2o 叢集中d的任意節點IP,
#set_client_ip("10.111.32.16") h2o 的ip 可以同一個主機
conf = H2OConf(ss).set_external_cluster_mode().use_manual_cluster_start().set_h2o_cluster("10.111.32.12",54321).set_client_ip("10.111.32.16").set_cloud_name("test")
hc = H2OContext.getOrCreate(ss, conf)
conf =H2OConf(ss).set_external_cluster_mode().set('HADOOP_USER_NAME','dp').set_h2o_driver_path().set_user_name('dp').use_auto_cluster_start().set_num_of_external_h2o_nodes(1).set_mapper_xmx("2G").set_yarn_queue("h2o").set_cloud_name("h2o_gbm")
conf = H2OConf(ss).set_external_cluster_mode().set('HADOOP_USER_NAME', 'dp').set_h2o_driver_path(
"/home/dp/h2odriver/h2odriver-sw2.3.18-hdp2.6-extended.jar").set_user_name(
'dp').use_auto_cluster_start().set_num_of_external_h2o_nodes(2).set_mapper_xmx("6G").set_yarn_queue(
"default").set_cloud_name("h2o_gbm")
conf = H2OConf(ss)
conf.set_num_h2o_workers(2)
hc = H2OContext.getOrCreate(ss, conf)
df_corr_h2o = hc.as_h2o_frame(df_corr,framename='df_corr_h2o')
temp_corr_features_list = df_corr_h2o.cor()
corr_list = Correlation.corr(df, 'features' ,method="spearman")
print("相關系數計算:",temp_corr_features_list," type ",type(temp_corr_features_list),"話費時間:",time.time()-start_time)
columns_cols = list(final_table_schema.keys())
if self.y_col in list(final_table_schema.keys()):
columns_cols.remove(self.y_col)
columns_cols = list()
for k, v in final_table_schema.items():
if k != self.y_col and v != 'timestamp':
columns_cols.append(k)