function[final_dag,history_dags,history_scores]=rmcv_gs(init_dag,train_data,test_data,nodes_sizes,class_node_number,k) % Risk Minimization by Cross Validation Hill-Climbing Search: % init_dag - the initial DAG structure % train_data - the training data % test data - NOT USED FOR LEARNING, for evaluation purpose only. % nodes_sizes - the number of possible values each node can get % class_node_number - indicates the number of the class node % k - number of cross validation folds, CV-k. %make sets for CV-k data=cell2num(train_data'); datacell=train_data; validation_size=size(train_data,2)/k; train_size=(k-1)*validation_size; clear train_data; for c1=1:k train_data{c1}=num2cell(data(1:train_size,:)'); validation_data{c1}=num2cell(data(train_size+1:end,:)'); data=circshift(data,[validation_size 0]); end %calculate initial DAG RMCV score (average over RMHO) tic; init_score=0; for c1=1:k init_score=init_score+rmho_score(init_dag,nodes_sizes,train_data{c1},validation_data{c1},class_node_number); end init_score=init_score/k; disp(sprintf('Initial DAG scoring time is %f seconds',toc)); disp(sprintf('Initial DAG score is %f',init_score)); %greedy search for a network with a higher RMCV score %save initial structure and score current_dag=init_dag; current_score=init_score; %convergence flag converged=false; %history saver, saves results for RMCV (current_score), train/test set classification accuracy history_counter=1; history_scores(history_counter,:)=[current_score 1-rmho_score(current_dag,nodes_sizes,datacell,datacell,class_node_number) 1-rmho_score(current_dag,nodes_sizes,datacell,test_data,class_node_number)]; history_dags{history_counter}=current_dag; %search loop while (converged==false) %generate neighborhood (set of dags) of current dag dags=mk_nbrs_of_dag(current_dag); %score each adjacent dag num_dags=size(dags,2); disp(sprintf('Current DAG has %d neighbours.',num_dags)); scores=zeros(1,num_dags); %RMCV-k scoring based on RMHO for c1=1:num_dags temp=0; for c2=1:k temp=temp+rmho_score(dags{c1},nodes_sizes,train_data{c2},validation_data{c2},class_node_number); end scores(c1)=temp/k; end %continue search through the dag with the highest score if available if min(scores)