Caffe是目前深度學習比較優秀好用的一個開源庫,采樣c++和CUDA實現,具有速度快,模型定義方便等優點。學習了幾天過後,發現也有一個不方便的地方,就是在我的程序中調用Caffe做圖像分類沒有直接的接口。Caffe的數據層可以從數據庫(支持leveldb、lmdb、hdf5)、圖片、和內存中讀入。我們要在程序中使用,當然得從內存中讀入。參見http://caffe.berkeleyvision.org/tutorial/layers.html#data-layers和MemoryDataLayer源碼,我們首先在模型定義文件中定義數據層:
layers { name: "mydata" type: MEMORY_DATA top: "data" top: "label" transform_param { scale: 0.00390625 } memory_data_param { batch_size: 10 channels: 1 height: 24 width: 24 } }
這裡必須設置memory_data_param中的四個參數,對應這些參數可以參見源碼中caffe.proto文件。現在,我們可以設計一個Classifier類來封裝一下:
#ifndef CAFFE_CLASSIFIER_H #define CAFFE_CLASSIFIER_H #include <string> #include <vector> #include "caffe/net.hpp" #include "caffe/data_layers.hpp" #include <opencv2/core.hpp> using cv::Mat; namespace caffe { template <typename Dtype> class Classifier { public: explicit Classifier(const string& param_file, const string& weights_file); Dtype test(vector<Mat> &images, vector<int> &labels, int iter_num); virtual ~Classifier() {} inline shared_ptr<Net<Dtype> > net() { return net_; } void predict(vector<Mat> &images, vector<int> *labels); void predict(vector<Dtype> &data, vector<int> *labels, int num); void extract_feature(vector<Mat> &images, vector<vector<Dtype>> *out); protected: shared_ptr<Net<Dtype> > net_; MemoryDataLayer<Dtype> *m_layer_; int batch_size_; int channels_; int height_; int width_; DISABLE_COPY_AND_ASSIGN(Classifier); }; }//namespace #endif //CAFFE_CLASSIFIER_H
構造函數中我們通過模型定義文件(.prototxt)和訓練好的模型(.caffemodel)文件構造一個Net對象,並用m_layer_指向Net中的memory data層,以便待會調用MemoryDataLayer中AddMatVector和Reset函數加入數據。
#include <cstdio> #include <algorithm> #include <string> #include <vector> #include "caffe/net.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/io.hpp" #include "caffe/util/math_functions.hpp" #include "caffe/util/upgrade_proto.hpp" #include "caffe_classifier.h" namespace caffe { template <typename Dtype> Classifier<Dtype>::Classifier(const string& param_file, const string& weights_file) : net_() { net_.reset(new Net<Dtype>(param_file, TEST)); net_->CopyTrainedLayersFrom(weights_file); //m_layer_ = (MemoryDataLayer<Dtype>*)net_->layer_by_name("mnist").get(); m_layer_ = (MemoryDataLayer<Dtype>*)net_->layers()[0].get(); batch_size_ = m_layer_->batch_size(); channels_ = m_layer_->channels(); height_ = m_layer_->height(); width_ = m_layer_->width(); } template <typename Dtype> Dtype Classifier<Dtype>::test(vector<Mat> &images, vector<int> &labels, int iter_num) { m_layer_->AddMatVector(images, labels); // int iterations = iter_num; vector<Blob<Dtype>* > bottom_vec; vector<int> test_score_output_id; vector<Dtype> test_score; Dtype loss = 0; for (int i = 0; i < iterations; ++i) { Dtype iter_loss; const vector<Blob<Dtype>*>& result = net_->Forward(bottom_vec, &iter_loss); loss += iter_loss; int idx = 0; for (int j = 0; j < result.size(); ++j) { const Dtype* result_vec = result[j]->cpu_data(); for (int k = 0; k < result[j]->count(); ++k, ++idx) { const Dtype score = result_vec[k]; if (i == 0) { test_score.push_back(score); test_score_output_id.push_back(j); } else { test_score[idx] += score; } const std::string& output_name = net_->blob_names()[ net_->output_blob_indices()[j]]; LOG(INFO) << "Batch " << i << ", " << output_name << " = " << score; } } } loss /= iterations; LOG(INFO) << "Loss: " << loss; return loss; } template <typename Dtype> void Classifier<Dtype>::predict(vector<Mat> &images, vector<int> *labels) { int original_length = images.size(); if(original_length == 0) return; int valid_length = original_length / batch_size_ * batch_size_; if(original_length != valid_length) { valid_length += batch_size_; for(int i = original_length; i < valid_length; i++) { images.push_back(images[0].clone()); } } vector<int> valid_labels, predicted_labels; valid_labels.resize(valid_length, 0); m_layer_->AddMatVector(images, valid_labels); vector<Blob<Dtype>* > bottom_vec; for(int i = 0; i < valid_length / batch_size_; i++) { const vector<Blob<Dtype>*>& result = net_->Forward(bottom_vec); const Dtype * result_vec = result[1]->cpu_data(); for(int j = 0; j < result[1]->count(); j++) { predicted_labels.push_back(result_vec[j]); } } if(original_length != valid_length) { images.erase(images.begin()+original_length, images.end()); } labels->resize(original_length, 0); std::copy(predicted_labels.begin(), predicted_labels.begin() + original_length, labels->begin()); } template <typename Dtype> void Classifier<Dtype>::predict(vector<Dtype> &data, vector<int> *labels, int num) { int size = channels_*height_*width_; CHECK_EQ(data.size(), num*size); int original_length = num; if(original_length == 0) return; int valid_length = original_length / batch_size_ * batch_size_; if(original_length != valid_length) { valid_length += batch_size_; for(int i = original_length; i < valid_length; i++) { for(int j = 0; j < size; j++) data.push_back(0); } } vector<int> predicted_labels; Dtype * label_ = new Dtype[valid_length]; memset(label_, 0, valid_length); m_layer_->Reset(data.data(), label_, valid_length); vector<Blob<Dtype>* > bottom_vec; for(int i = 0; i < valid_length / batch_size_; i++) { const vector<Blob<Dtype>*>& result = net_->Forward(bottom_vec); const Dtype * result_vec = result[1]->cpu_data(); for(int j = 0; j < result[1]->count(); j++) { predicted_labels.push_back(result_vec[j]); } } if(original_length != valid_length) { data.erase(data.begin()+original_length*size, data.end()); } delete [] label_; labels->resize(original_length, 0); std::copy(predicted_labels.begin(), predicted_labels.begin() + original_length, labels->begin()); } template <typename Dtype> void Classifier<Dtype>::extract_feature(vector<Mat> &images, vector<vector<Dtype>> *out) { int original_length = images.size(); if(original_length == 0) return; int valid_length = original_length / batch_size_ * batch_size_; if(original_length != valid_length) { valid_length += batch_size_; for(int i = original_length; i < valid_length; i++) { images.push_back(images[0].clone()); } } vector<int> valid_labels; valid_labels.resize(valid_length, 0); m_layer_->AddMatVector(images, valid_labels); vector<Blob<Dtype>* > bottom_vec; out->clear(); for(int i = 0; i < valid_length / batch_size_; i++) { const vector<Blob<Dtype>*>& result = net_->Forward(bottom_vec); const Dtype * result_vec = result[0]->cpu_data(); const int dim = result[0]->count(1); for(int j = 0; j < result[0]->num(); j++) { const Dtype * ptr = result_vec + j * dim; vector<Dtype> one_; for(int k = 0; k < dim; ++k) one_.push_back(ptr[k]); out->push_back(one_); } } if(original_length != valid_length) { images.erase(images.begin()+original_length, images.end()); out->erase(out->begin()+original_length, out->end()); } } INSTANTIATE_CLASS(Classifier); } // namespace caffe
由於加入的數據個數必須是batch_size的整數倍,所以我們在加入數據時采用填充的方式。
CHECK_EQ(num % batch_size_, 0) << "The added data must be a multiple of the batch size."; //AddMatVector
在模型文件的最後,我們把訓練時的loss層改為argmax層:
layers { name: "predicted" type: ARGMAX bottom: "prob" top: "predicted" }作者:waring 出處:http://www.cnblogs.com/waring 歡迎轉載或分享,但請務必聲明文章出處。