SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Subset support written (W) 2011 Heiko Strathmann 00010 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00011 */ 00012 00013 #include <shogun/features/Labels.h> 00014 #include <shogun/lib/common.h> 00015 #include <shogun/io/File.h> 00016 #include <shogun/io/SGIO.h> 00017 #include <shogun/mathematics/Math.h> 00018 #include <shogun/base/Parameter.h> 00019 #include <shogun/lib/Set.h> 00020 00021 using namespace shogun; 00022 00023 CLabels::CLabels() 00024 : CSGObject() 00025 { 00026 init(); 00027 } 00028 00029 CLabels::CLabels(int32_t num_lab) 00030 : CSGObject() 00031 { 00032 init(); 00033 labels=SGVector<float64_t>(num_lab); 00034 } 00035 00036 CLabels::CLabels(SGVector<float64_t> src) 00037 : CSGObject() 00038 { 00039 init(); 00040 00041 set_labels(src); 00042 m_num_classes=get_num_classes(); 00043 } 00044 00045 void CLabels::set_to_one() 00046 { 00047 ASSERT(labels.vector); 00048 index_t subset_size=get_num_labels(); 00049 for (int32_t i=0; i<subset_size; i++) 00050 labels.vector[subset_idx_conversion(i)]=+1; 00051 } 00052 00053 CLabels::CLabels(CFile* loader) 00054 : CSGObject() 00055 { 00056 init(); 00057 load(loader); 00058 } 00059 00060 CLabels::~CLabels() 00061 { 00062 labels.destroy_vector(); 00063 delete m_subset; 00064 m_subset=NULL; 00065 00066 m_num_classes=0; 00067 } 00068 00069 void CLabels::init() 00070 { 00071 m_parameters->add(&labels, "labels", "The labels."); 00072 m_parameters->add((CSGObject**)&m_subset, "subset", "Subset object"); 00073 00074 labels=SGVector<float64_t>(); 00075 m_num_classes=0; 00076 m_subset=NULL; 00077 } 00078 00079 void CLabels::set_labels(SGVector<float64_t> v) 00080 { 00081 if (m_subset) 00082 SG_ERROR("A subset is set, cannot set labels\n"); 00083 00084 labels.free_vector(); 00085 labels=v; 00086 labels.do_free=false; 00087 } 00088 00089 bool CLabels::is_two_class_labeling() 00090 { 00091 ASSERT(labels.vector); 00092 bool found_plus_one=false; 00093 bool found_minus_one=false; 00094 00095 int32_t subset_size=get_num_labels(); 00096 for (int32_t i=0; i<subset_size; i++) 00097 { 00098 int32_t real_i=subset_idx_conversion(i); 00099 if (labels.vector[real_i]==+1.0) 00100 found_plus_one=true; 00101 else if (labels.vector[real_i]==-1.0) 00102 found_minus_one=true; 00103 else 00104 { 00105 SG_ERROR("Not a two class labeling label[%d]=%f (only +1/-1 " 00106 "allowed)\n", i, labels.vector[real_i]); 00107 } 00108 } 00109 00110 if (!found_plus_one) 00111 SG_ERROR("Not a two class labeling - no positively labeled examples found\n"); 00112 if (!found_minus_one) 00113 SG_ERROR("Not a two class labeling - no negatively labeled examples found\n"); 00114 00115 return true; 00116 } 00117 00118 int32_t CLabels::get_num_classes() 00119 { 00120 CSet<float64_t>* classes=new CSet<float64_t>(); 00121 for (int32_t i=0; i<get_num_labels(); i++) 00122 classes->add(get_label(i)); 00123 00124 int32_t result=classes->get_num_elements(); 00125 SG_UNREF(classes); 00126 return result; 00127 } 00128 00129 SGVector<float64_t> CLabels::get_classes() 00130 { 00131 CSet<float64_t>* classes=new CSet<float64_t>(); 00132 00133 for (int32_t i=0; i<get_num_labels(); i++) 00134 classes->add(get_label(i)); 00135 00136 SGVector<float64_t> result(classes->get_num_elements()); 00137 memcpy(result.vector, classes->get_array(), 00138 sizeof(float64_t)*classes->get_num_elements()); 00139 00140 SG_UNREF(classes); 00141 return result; 00142 } 00143 00144 SGVector<float64_t> CLabels::get_labels() 00145 { 00146 if (m_subset) 00147 SG_ERROR("get_labels() is not possible on subset"); 00148 00149 return labels; 00150 } 00151 00152 SGVector<int32_t> CLabels::get_int_labels() 00153 { 00154 SGVector<int32_t> intlab(get_num_labels(), true); 00155 00156 for (int32_t i=0; i<get_num_labels(); i++) 00157 intlab.vector[i]= get_int_label(i); 00158 00159 return intlab; 00160 } 00161 00162 void CLabels::set_int_labels(SGVector<int32_t> lab) 00163 { 00164 if (m_subset) 00165 SG_ERROR("set_int_labels() is not possible on subset"); 00166 00167 labels.free_vector(); 00168 labels = SGVector<float64_t>(lab.vlen); 00169 00170 for (int32_t i=0; i<lab.vlen; i++) 00171 set_int_label(i, labels.vector[i]); 00172 } 00173 00174 void CLabels::load(CFile* loader) 00175 { 00176 remove_subset(); 00177 00178 SG_SET_LOCALE_C; 00179 labels.free_vector(); 00180 00181 ASSERT(loader); 00182 loader->get_vector(labels.vector, labels.vlen); 00183 m_num_classes=get_num_classes(); 00184 SG_RESET_LOCALE; 00185 } 00186 00187 void CLabels::save(CFile* writer) 00188 { 00189 if (m_subset) 00190 SG_ERROR("save() is not possible on subset"); 00191 00192 SG_SET_LOCALE_C; 00193 ASSERT(writer); 00194 ASSERT(labels.vector && labels.vlen>0); 00195 writer->set_vector(labels.vector, labels.vlen); 00196 SG_RESET_LOCALE; 00197 } 00198 00199 bool CLabels::set_label(int32_t idx, float64_t label) 00200 { 00201 int32_t real_num=subset_idx_conversion(idx); 00202 if (labels.vector && real_num<get_num_labels()) 00203 { 00204 labels.vector[real_num]=label; 00205 return true; 00206 } 00207 else 00208 return false; 00209 } 00210 00211 bool CLabels::set_int_label(int32_t idx, int32_t label) 00212 { 00213 int32_t real_num=subset_idx_conversion(idx); 00214 if (labels.vector && real_num<get_num_labels()) 00215 { 00216 labels.vector[real_num]= (float64_t) label; 00217 return true; 00218 } 00219 else 00220 return false; 00221 } 00222 00223 float64_t CLabels::get_label(int32_t idx) 00224 { 00225 int32_t real_num=subset_idx_conversion(idx); 00226 ASSERT(labels.vector && idx<get_num_labels()); 00227 return labels.vector[real_num]; 00228 } 00229 00230 int32_t CLabels::get_int_label(int32_t idx) 00231 { 00232 int32_t real_num=subset_idx_conversion(idx); 00233 ASSERT(labels.vector && idx<get_num_labels()); 00234 if (labels.vector[real_num] != float64_t((int32_t(labels.vector[real_num])))) 00235 SG_ERROR("label[%d]=%g is not an integer\n", labels.vector[real_num]); 00236 00237 return int32_t(labels.vector[real_num]); 00238 } 00239 00240 int32_t CLabels::get_num_labels() 00241 { 00242 return m_subset ? m_subset->get_size() : labels.vlen; 00243 } 00244 00245 void CLabels::set_subset(CSubset* subset) 00246 { 00247 SG_UNREF(m_subset); 00248 m_subset=subset; 00249 SG_REF(subset); 00250 } 00251 00252 void CLabels::remove_subset() 00253 { 00254 set_subset(NULL); 00255 } 00256 00257 index_t CLabels::subset_idx_conversion(index_t idx) const 00258 { 00259 return m_subset ? m_subset->subset_idx_conversion(idx) : idx; 00260 }