Tensorflow cppAPI を使って簡単な算数の問題を解く

cppAPI を使って学習する。

ソースから Tensorflow をコンパイルする。では、Mac環境のcでTensorflowを呼出しましたが、実際に学習したり予測したりするためにはそれ用のメソッドが含まれたライブラリを使用する必要があります。C++でTensorflow APIを利用するためにソースコードから Tensorflow をコンパイルします。githubからTensorflowのリポジトリをダウンロードします。 bazel をインストールTensorflowのコンパイルには、googleが提供しているコンパイラであるBazelを使用します。Bazelはbrewでインストールできます。その他のインストール方...

でインストールしたAPIを使ってみます。
Tensorflow の c++ API を使って学習したモデルを予測するソースはちょくちょくWEBで見つかったのですが
学習するソースについてはあまりなかったので、一応メモします。
今回行うのは簡単な算数の問題を解くタスクです。
例えば、
1 □ 4 = 5
とあったら、□にはいるのは + と解くタスクです。
答えの種類は+-×÷です。

なので、入力は３つ
出力は４つです。
以下でも全部張っていますが、一応ソースコードにも上げてます。

pythonでモデル構造pbファイルだけ吐き出す。

pythonでモデル構造だけ吐き出します。

with tf.Session() as sess:
    x = tf.placeholder(tf.float32, [None, 3], name="x")
    y = tf.placeholder(tf.float32, [None, 4], name="y")

    w1 = tf.Variable(tf.truncated_normal([3, 16], stddev=0.1))
    b1 = tf.Variable(tf.constant(0.0, shape=[16]))

    w2 = tf.Variable(tf.truncated_normal([16, 32], stddev=0.1))
    b2 = tf.Variable(tf.constant(0.0, shape=[32]))

    w3 = tf.Variable(tf.truncated_normal([32, 4], stddev=0.1))
    b3 = tf.Variable(tf.constant(0.0, shape=[4]))

    a = tf.nn.relu(tf.nn.bias_add(tf.matmul(x, w1), b1))
    a = tf.nn.relu(tf.nn.bias_add(tf.matmul(a, w2), b2))
    y_out = tf.nn.relu(tf.nn.bias_add(tf.matmul(a, w3), b3), name="y_out")
    y_argout = tf.argmax(input=y_out, axis=1, name="y_argout")
    logits = y_out
    #cost = tf.reduce_sum(tf.square(y-y_out), name="cost")
    loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y), name="loss")
    optimizer = tf.train.AdamOptimizer().minimize(loss, name="train")

    # init = tf.initialize_variables(tf.all_variables(), name='init_all_vars_op')
    init = tf.variables_initializer(tf.global_variables(), name='init_all_vars_op')
    tf.train.write_graph(sess.graph_def, './', 'mlp2.pb', as_text=False)

シンプルなニューラルネットです。

cpp でモデルファイルを読み込んで学習する。

学習するファイルです。


#include <iostream>
#include <stdio.h>
#include <random>
#include <tensorflow/c/c_api.h>
#include "tensorflow/cc/client/client_session.h"
#include "tensorflow/cc/ops/standard_ops.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/public/session.h"
#include "tensorflow/core/graph/default_device.h"

using namespace tensorflow;
using namespace tensorflow::ops;

void create_train_tensor(TTypes<float>::Matrix &x_tensor, TTypes<float>::Matrix &y_tensor);

int main(int argc, const char * argv[]) {
    // insert code here...
    printf("hello from tensorflow c library version %s\n", TF_Version());
    
    printf("start training. \n");
    
    std::string graph_definition = "mlp2.pb";
    Session* session;
    GraphDef graph_def;
    SessionOptions opts;
    std::vector<Tensor> outputs;
    TF_CHECK_OK(ReadBinaryProto(Env::Default(), graph_definition, &graph_def));
    
    TF_CHECK_OK(NewSession(opts, &session));
    
    TF_CHECK_OK(session->Create(graph_def));
    
    TF_CHECK_OK(session->Run({}, {}, {"init_all_vars_op"}, nullptr));
    
    // 学習データを作る
    int data_num = 100000;
    Tensor x(DT_FLOAT, TensorShape({data_num, 3}));
    Tensor y(DT_FLOAT, TensorShape({data_num, 4}));
    auto _XTensor = x.matrix<float>();
    auto _YTensor = y.matrix<float>();
    create_train_tensor(_XTensor, _YTensor);

    // テストデータを作る
    int test_num = 100;
    Tensor tx(DT_FLOAT, TensorShape({test_num, 3}));
    Tensor ty(DT_FLOAT, TensorShape({test_num, 4}));
    auto _TXT = tx.matrix<float>();
    auto _TYT = ty.matrix<float>();
    create_train_tensor(_TXT, _TYT);
    
    // どのテストデータの評価結果を出力するかに使うだけのランダムを設定
    std::random_device rnd;
    std::mt19937 mt(rnd());
    std::uniform_int_distribution<int> dice(1,test_num);
    
    std::map<int, std::string> ansmap;
    ansmap[0] = "+";
    ansmap[1] = "-";
    ansmap[2] = "÷";
    ansmap[3] = "×";
    for (int i = 0; i < 2000; ++i) {
        // 学習する時は、最後にnullptr
        // その前を{"train"}とする
        TF_CHECK_OK(session->Run({{"x", x}, {"y", y}}, {}, {"train"}, nullptr));
        
        if (i%100==0) {
            // 予測する時は,pythonで設定したnameでほしいものを指定する。
            // {"loss","y_out","y_argout"}と３つ指定しているので
            // 結果がそれぞれがoutputs[0],outputs[1],outputs[2]に入っている。
            TF_CHECK_OK(session->Run({{"x", tx}, {"y", ty}}, {"loss","y_out","y_argout"}, {}, &outputs));
            float loss = outputs[0].scalar<float>()(0)/test_num;
            std::cout << "epoch : " << i << ",  loss: " << loss << std::endl;
            for (int j=0; j<3; j++) {
                int dc = dice(mt);
                float x0 = tx.matrix<float>()(dc,0);
                float x1 = tx.matrix<float>()(dc,1);
                float x2 = tx.matrix<float>()(dc,2);
                std::cout << " Q." << j << ": " << x0 << " _ " << x1 << " = " << x2 << std::endl;
                std::cout << "   A: " << ansmap[outputs[2].flat<int64>()(dc)] << "  <--  " << std::endl;
            }
            outputs.clear();
            std::cout << std::endl;
        }
    }
    
    session->Close();
    delete session;
    return 0;
}

void create_train_tensor(TTypes<float>::Matrix &x_tensor, TTypes<float>::Matrix &y_tensor){
    int train_num = x_tensor.dimension(0);
    // A ? B = C のとき
    // A,B,Cをとりあえずランダムに設定
    x_tensor.setRandom();
    // ? はとりあえずゼロ
    y_tensor.setZero();
    // ? をランダムに決めるための乱数設定
    std::random_device rnd;
    std::mt19937 mt(rnd());
    std::uniform_int_distribution<int> dice(1,4);
    for (int i=0; i<train_num; i++) {
        // ? を決める
        int dc = dice(mt);
　　　　// ? に従って、Cを書き換えていく。
        if (dc==0) {
            x_tensor(i,2) = x_tensor(i,0) + x_tensor(i,1);
            y_tensor(i,0) = 1;
        }
        else if (dc==1){
            x_tensor(i,2) = x_tensor(i,0) - x_tensor(i,1);
            y_tensor(i,1) = 1;
        }
        else if (dc==2){
            x_tensor(i,2) = x_tensor(i,0) / x_tensor(i,1);
            y_tensor(i,2) = 1;
        }
        else{
            x_tensor(i,2) = x_tensor(i,0) * x_tensor(i,1);
            y_tensor(i,3) = 1;
        }
    }
}

実行する

$
$ g++ main.cpp -I/usr/local/include/tensorflow/include -L/usr/local/lib -ltensorflow_cc -std=c++11 -ltensorflow_framework
...
$ ./a.out
...
...
...
epoch : 1700,  loss: 0.0641037
 Q.0: 0.897669 _ 0.551421 = 0.346248
   A: -  <--
 Q.1: 0.605148 _ 0.282638 = 2.14107
   A: ÷  <--
 Q.2: 0.305765 _ 0.38952 = -0.0837549
   A: -  <--

epoch : 1800,  loss: 0.0602399
 Q.0: 0.298945 _ 0.803915 = 0.240326
   A: ×  <--
 Q.1: 0.107497 _ 0.551765 = 0.194824
   A: ÷  <--
 Q.2: 0.672843 _ 0.774557 = 0.521156
   A: ×  <--

epoch : 1900,  loss: 0.0569926
 Q.0: 0.261485 _ 0.796014 = 0.208146
   A: ×  <--
 Q.1: 0.664705 _ 0.641435 = 1.03628
   A: ÷  <--
 Q.2: 0.16627 _ 0.875046 = 0.145494
   A: ×  <--
$

と、lossが減って、だんだん正解しているのがわかると思います。
（accuracyはちょっと面倒だったので出してないのですが）

C++でモデルを構築するのはまだ調べ途中なので、わかったらまた恐らくメモすると思います。