SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 */ 00010 00011 #include <shogun/lib/common.h> 00012 #include <shogun/io/SGIO.h> 00013 #include <shogun/features/StringFeatures.h> 00014 #include <shogun/features/Labels.h> 00015 #include <shogun/distributions/LinearHMM.h> 00016 #include <shogun/classifier/PluginEstimate.h> 00017 00018 using namespace shogun; 00019 00020 CPluginEstimate::CPluginEstimate(float64_t pos_pseudo, float64_t neg_pseudo) 00021 : CMachine(), m_pos_pseudo(1e-10), m_neg_pseudo(1e-10), 00022 pos_model(NULL), neg_model(NULL), features(NULL) 00023 { 00024 m_parameters->add(&m_pos_pseudo, 00025 "pos_pseudo","pseudo count for positive class"); 00026 m_parameters->add(&m_neg_pseudo, 00027 "neg_pseudo", "pseudo count for negative class"); 00028 00029 m_parameters->add((CSGObject**) &pos_model, 00030 "pos_model", "LinearHMM modelling positive class."); 00031 m_parameters->add((CSGObject**) &neg_model, 00032 "neg_model", "LinearHMM modelling negative class."); 00033 00034 m_parameters->add((CSGObject**) &features, 00035 "features", "String Features."); 00036 } 00037 00038 CPluginEstimate::~CPluginEstimate() 00039 { 00040 SG_UNREF(pos_model); 00041 SG_UNREF(neg_model); 00042 00043 SG_UNREF(features); 00044 } 00045 00046 bool CPluginEstimate::train_machine(CFeatures* data) 00047 { 00048 ASSERT(labels); 00049 if (data) 00050 { 00051 if (data->get_feature_class() != C_STRING || 00052 data->get_feature_type() != F_WORD) 00053 { 00054 SG_ERROR("Features not of class string type word\n"); 00055 } 00056 00057 set_features((CStringFeatures<uint16_t>*) data); 00058 } 00059 ASSERT(features); 00060 00061 SG_UNREF(pos_model); 00062 SG_UNREF(neg_model); 00063 00064 pos_model=new CLinearHMM(features); 00065 neg_model=new CLinearHMM(features); 00066 00067 SG_REF(pos_model); 00068 SG_REF(neg_model); 00069 00070 int32_t* pos_indizes=SG_MALLOC(int32_t, ((CStringFeatures<uint16_t>*) features)->get_num_vectors()); 00071 int32_t* neg_indizes=SG_MALLOC(int32_t, ((CStringFeatures<uint16_t>*) features)->get_num_vectors()); 00072 00073 ASSERT(labels->get_num_labels()==features->get_num_vectors()); 00074 00075 int32_t pos_idx=0; 00076 int32_t neg_idx=0; 00077 00078 for (int32_t i=0; i<labels->get_num_labels(); i++) 00079 { 00080 if (labels->get_label(i) > 0) 00081 pos_indizes[pos_idx++]=i; 00082 else 00083 neg_indizes[neg_idx++]=i; 00084 } 00085 00086 SG_INFO( "training using pseudos %f and %f\n", m_pos_pseudo, m_neg_pseudo); 00087 pos_model->train(pos_indizes, pos_idx, m_pos_pseudo); 00088 neg_model->train(neg_indizes, neg_idx, m_neg_pseudo); 00089 00090 SG_FREE(pos_indizes); 00091 SG_FREE(neg_indizes); 00092 00093 return true; 00094 } 00095 00096 CLabels* CPluginEstimate::apply() 00097 { 00098 ASSERT(features); 00099 CLabels* result=new CLabels(features->get_num_vectors()); 00100 ASSERT(result->get_num_labels()==features->get_num_vectors()); 00101 00102 for (int32_t vec=0; vec<features->get_num_vectors(); vec++) 00103 result->set_label(vec, apply(vec)); 00104 00105 return result; 00106 } 00107 00108 CLabels* CPluginEstimate::apply(CFeatures* data) 00109 { 00110 if (!data) 00111 SG_ERROR("No features specified\n"); 00112 00113 if (data->get_feature_class() != C_STRING || 00114 data->get_feature_type() != F_WORD) 00115 { 00116 SG_ERROR("Features not of class string type word\n"); 00117 } 00118 00119 set_features((CStringFeatures<uint16_t>*) data); 00120 return apply(); 00121 } 00122 00123 float64_t CPluginEstimate::apply(int32_t vec_idx) 00124 { 00125 ASSERT(features); 00126 00127 int32_t len; 00128 bool free_vec; 00129 uint16_t* vector=features->get_feature_vector(vec_idx, len, free_vec); 00130 00131 if ((!pos_model) || (!neg_model)) 00132 SG_ERROR( "model(s) not assigned\n"); 00133 00134 float64_t result=pos_model->get_log_likelihood_example(vector, len) - neg_model->get_log_likelihood_example(vector, len); 00135 features->free_feature_vector(vector, vec_idx, free_vec); 00136 return result; 00137 }