tensorflowのC++APIで変数を保存する

　前回はtensorflowのC++APIを用いて学習をできるようなコードを書いた。

　今回は学習した変数をファイルに書き出し及び読み出しできるように変更した。なかなか難しく、まだ理解できていないところが多い。

書き込み

　Saveのシグネチャは公式ドキュメントの通り

Save(const ::tensorflow::Scope & scope, ::tensorflow::Input filename, ::tensorflow::Input tensor_names, ::tensorflow::InputList data)

であり、引数に必要なものは

scope
filename
tensor_names
data

の4つである。したがっておおむね

auto save = Save(scope, "file_name", { "w1", "w2", "b1", "b2" }, { w1, w2, b1, b2 });

のような形でノードを生成し、Runで走らせれば良い。しかしfilename,tensor_namesはstringであるため(？)直接埋め込むことはできず、またInputListもそのままでは各変数がInputとOutputの2通りで解釈可能であるため初期化できないというエラーが出る。

　したがって、まず各stringに対してはデータを格納するPlaceholderと実際のデータであるTensorを準備する。

    //保存するファイル名
    auto file_name_op = Placeholder(scope, DT_STRING);
    Tensor file_name(DT_STRING, TensorShape{ 1 });
    file_name.scalar<string>()() = "model.ckpt";

    //保存する変数につける名前
    auto tensor_names_op = Placeholder(scope, DT_STRING);
    Tensor tensor_names(DT_STRING, TensorShape{ 4 });
    tensor_names.vec<string>()(0) = "w1";
    tensor_names.vec<string>()(1) = "w2";
    tensor_names.vec<string>()(2) = "b1";
    tensor_names.vec<string>()(3) = "b2";

　InputListにはInput(w1)などとするとコンパイルが通った。ここの理由はまだよくわかっていない。

    //保存する変数
    auto tensors = InputList({ Input(w1), Input(w2), Input(b1), Input(b2) });

　この順番はfile_namesで指定する順番と一致していなければならない。

　このように引数を準備してから(少なくともPlaceholderを準備してから)ノードを生成し、feed_dictでデータを与えつつセッションを走らせる。

    //保存するOp
    auto save = Save(scope, file_name_op, tensor_names_op, tensors);

    //実行
    TF_CHECK_OK(session.Run({ {file_name_op, file_name}, {tensor_names_op, tensor_names} }, {}, { save }, nullptr));

　fetchするものではないため(？)第2引数は空にし、第3引数にsaveを与える。

読み出し

　Resroreのシグネチャも公式ドキュメントの通り

Restore(const ::tensorflow::Scope & scope, ::tensorflow::Input file_pattern, ::tensorflow::Input tensor_name, DataType dt)

であり、必要な引数は

scope
file_pattern
tensor_name
dt

である。気を付けるべき点としては(筆者の理解では)、Restoreは一つのTensorを読み出すものであり、読み出したい変数の数だけ読み出しノードを作る必要がある。また読み出しただけでは変数に割り当てられていないため、Assignを用いて対応する変数ノードに割り当てる必要がある。

　引数についてはfile_patternにはSaveでのfile_nameと同じものを与えればよく、Saveと同様に型がstringであるtensor_nameについてはPlaceholderを作りTensorにデータを入れて実行時に流し込むということを行う。

    //1変数に対して1個ずつPlaceholderとTensorを作る
    auto w1_name_op = Placeholder(scope, DT_STRING);
    Tensor w1_name(DT_STRING, TensorShape{ 1 });
    w1_name.scalar<string>()() = "w1";
    auto w2_name_op = Placeholder(scope, DT_STRING);
    Tensor w2_name(DT_STRING, TensorShape{ 1 });
    w2_name.scalar<string>()() = "w2";
    auto b1_name_op = Placeholder(scope, DT_STRING);
    Tensor b1_name(DT_STRING, TensorShape{ 1 });
    b1_name.scalar<string>()() = "b1";
    auto b2_name_op = Placeholder(scope, DT_STRING);
    Tensor b2_name(DT_STRING, TensorShape{ 1 });
    b2_name.scalar<string>()() = "b2";

    //読みだすOp
    auto restore_w1 = Restore(scope, file_name_op, w1_name_op, DT_FLOAT);
    auto restore_assign_w1 = Assign(scope, w1, restore_w1);
    auto restore_w2 = Restore(scope, file_name_op, w2_name_op, DT_FLOAT);
    auto restore_assign_w2 = Assign(scope, w2, restore_w2);
    auto restore_b1 = Restore(scope, file_name_op, b1_name_op, DT_FLOAT);
    auto restore_assign_b1 = Assign(scope, b1, restore_b1);
    auto restore_b2 = Restore(scope, file_name_op, b2_name_op, DT_FLOAT);
    auto restore_assign_b2 = Assign(scope, b2, restore_b2);

    //実行
    TF_CHECK_OK(session.Run({ { file_name_op, file_name }, { w1_name_op, w1_name } }, { restore_assign_w1 }, nullptr));
    TF_CHECK_OK(session.Run({ { file_name_op, file_name }, { w2_name_op, w2_name } }, { restore_assign_w2 }, nullptr));
    TF_CHECK_OK(session.Run({ { file_name_op, file_name }, { b1_name_op, b1_name } }, { restore_assign_b1 }, nullptr));
    TF_CHECK_OK(session.Run({ { file_name_op, file_name }, { b2_name_op, b2_name } }, { restore_assign_b2 }, nullptr));

　こちらはfetchするものであるため(？)第二引数に与える。

　かなり冗長なコードになってしまい、可読性や保守性が低い。より大きいネットワークを構築しようとすると変数も多くなるため、効率的な書き方を模索する必要がある。

全体

　全体のコードは以下の様になった。

#define COMPILER_MSVC

#include "tensorflow/cc/client/client_session.h"
#include "tensorflow/core/public/session.h"
#include "tensorflow/cc/ops/standard_ops.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/cc/framework/gradients.h"

using namespace tensorflow;
using namespace tensorflow::ops;

double targetFunction(double x1, double x2) {
    return 2.0 * x1 * x2 - 2.4 * x1 * x1 + 3.1 * x2 * x2 + 2.7 * x1 - 10.0 * x2 + 5.0;
}

int main() {
    int unit_num;
    std::cout << "ユニット数 : ";
    std::cin >> unit_num;
    float learning_rate;
    std::cout << "学習率 : ";
    std::cin >> learning_rate;
    int batch_size;
    std::cout << "バッチサイズ : ";
    std::cin >> batch_size;
    int epoch_num;
    std::cout << "エポック数 : ";
    std::cin >> epoch_num;
    int restore_flag;
    std::cout << "変数の設定(0->初期化, 1->ファイルから読み込む) : ";
    std::cin >> restore_flag;

    Scope scope = Scope::NewRootScope();

    //入力,教師データのPlaceholder
    auto x = Placeholder(scope.WithOpName("x"), DT_FLOAT);
    auto y = Placeholder(scope.WithOpName("y"), DT_FLOAT);

    //中間層への重み、バイアス
    auto w1 = Variable(scope, { unit_num, 2 }, DT_FLOAT);
    auto assign_w1 = Assign(scope, w1, RandomNormal(scope, { unit_num, 2 }, DT_FLOAT));
    auto b1 = Variable(scope, { unit_num, 1 }, DT_FLOAT);
    auto assign_b1 = Assign(scope, b1, RandomNormal(scope, { unit_num, 1 }, DT_FLOAT));

    //出力層への重み、バイアス
    auto w2 = Variable(scope, { 1, unit_num  }, DT_FLOAT);
    auto assign_w2 = Assign(scope, w2, RandomNormal(scope, { 1, unit_num }, DT_FLOAT));
    auto b2 = Variable(scope, { 1, 1 }, DT_FLOAT);
    auto assign_b2 = Assign(scope, b2, RandomNormal(scope, { 1, 1 }, DT_FLOAT));

    //中間層
    auto hidden_layer = Relu(scope, Add(scope, MatMul(scope, w1, x), b1));

    //出力層
    auto output_layer = Add(scope.WithOpName("output_layer"), MatMul(scope, w2, hidden_layer), b2);

    //損失
    auto loss = ReduceMean(scope, Square(scope, Sub(scope, output_layer, y)), {0, 1});

    //勾配
    std::vector<Output> grad_outputs;
    TF_CHECK_OK(AddSymbolicGradients(scope, { loss }, { w1, w2, b1, b2 }, &grad_outputs));

    //勾配降下を各変数に適用
    auto apply_w1 = ApplyGradientDescent(scope, w1, learning_rate, { grad_outputs[0] });
    auto apply_w2 = ApplyGradientDescent(scope, w2, learning_rate, { grad_outputs[1] });
    auto apply_b1 = ApplyGradientDescent(scope, b1, learning_rate, { grad_outputs[2] });
    auto apply_b2 = ApplyGradientDescent(scope, b2, learning_rate, { grad_outputs[3] });
    
    //入力、教師データのPlaceholderに流す実際のデータ
    Tensor x_data(DT_FLOAT, TensorShape{ 2, batch_size });
    Tensor y_data(DT_FLOAT, TensorShape{ 1, batch_size });

    //x_dataとy_dataにbatch_size分のランダムデータを格納する関数
    auto setData = [&x_data, &y_data](int batch_size) {
        static std::random_device seed_gen;
        static std::default_random_engine engine(seed_gen());
        static std::uniform_real_distribution<> dist(-10.0, 10.0);

        for (int i = 0; i < batch_size; i++) {
            double x1 = dist(engine), x2 = dist(engine);
            x_data.matrix<float>()(0, i) = (float)x1;
            x_data.matrix<float>()(1, i) = (float)x2;

            y_data.matrix<float>()(0, i) = (float)targetFunction(x1, x2);
        }
    };

    //出力を受け取るための変数
    std::vector<Tensor> outputs;

    //保存する変数
    auto tensors = InputList({ Input(w1), Input(w2), Input(b1), Input(b2) });

    //保存するファイル名 stringのデータは直接埋め込めないらしい？
    //learning_rateとかは直接floatを書けるのに
    //なのでいちいちPlaceholderとTensorを作って流し込むということをやる
    auto file_name_op = Placeholder(scope, DT_STRING);
    Tensor file_name(DT_STRING, TensorShape{ 1 });
    file_name.scalar<string>()() = "model.ckpt";

    //保存する変数につける名前
    auto tensor_names_op = Placeholder(scope, DT_STRING);
    Tensor tensor_names(DT_STRING, TensorShape{ 4 });
    tensor_names.vec<string>()(0) = "w1";
    tensor_names.vec<string>()(1) = "w2";
    tensor_names.vec<string>()(2) = "b1";
    tensor_names.vec<string>()(3) = "b2";

    //Restoreするときは1変数ずつやっていく必要があるので1個ずつPlaceholderとTensorを作る
    auto w1_name_op = Placeholder(scope, DT_STRING);
    Tensor w1_name(DT_STRING, TensorShape{ 1 });
    w1_name.scalar<string>()() = "w1";
    auto w2_name_op = Placeholder(scope, DT_STRING);
    Tensor w2_name(DT_STRING, TensorShape{ 1 });
    w2_name.scalar<string>()() = "w2";
    auto b1_name_op = Placeholder(scope, DT_STRING);
    Tensor b1_name(DT_STRING, TensorShape{ 1 });
    b1_name.scalar<string>()() = "b1";
    auto b2_name_op = Placeholder(scope, DT_STRING);
    Tensor b2_name(DT_STRING, TensorShape{ 1 });
    b2_name.scalar<string>()() = "b2";

    //保存するOp
    auto save = Save(scope, file_name_op, tensor_names_op, tensors);

    //読みだすOp
    auto restore_w1 = Restore(scope, file_name_op, w1_name_op, DT_FLOAT);
    auto restore_assign_w1 = Assign(scope, w1, restore_w1);
    auto restore_w2 = Restore(scope, file_name_op, w2_name_op, DT_FLOAT);
    auto restore_assign_w2 = Assign(scope, w2, restore_w2);
    auto restore_b1 = Restore(scope, file_name_op, b1_name_op, DT_FLOAT);
    auto restore_assign_b1 = Assign(scope, b1, restore_b1);
    auto restore_b2 = Restore(scope, file_name_op, b2_name_op, DT_FLOAT);
    auto restore_assign_b2 = Assign(scope, b2, restore_b2);

    //セッションを作成
    ClientSession session(scope);

    if (restore_flag == 0) {
        //重みとバイアスを初期化
        TF_CHECK_OK(session.Run({ assign_w1, assign_w2, assign_b1, assign_b2 }, nullptr));
    } else {
        //ファイルから読み込む
        TF_CHECK_OK(session.Run({ { file_name_op, file_name }, { w1_name_op, w1_name } }, { restore_assign_w1 }, nullptr));
        TF_CHECK_OK(session.Run({ { file_name_op, file_name }, { w2_name_op, w2_name } }, { restore_assign_w2 }, nullptr));
        TF_CHECK_OK(session.Run({ { file_name_op, file_name }, { b1_name_op, b1_name } }, { restore_assign_b1 }, nullptr));
        TF_CHECK_OK(session.Run({ { file_name_op, file_name }, { b2_name_op, b2_name } }, { restore_assign_b2 }, nullptr));
    }

    for (int e = 0; e <= epoch_num; e++) {
        //検証
        setData(batch_size);
        TF_CHECK_OK(session.Run({ { x, x_data }, { y, y_data } }, { loss }, &outputs));

        printf("epoch = %5d, loss = %12.1f\n", e, outputs[0].scalar<float>()());
        if (e == epoch_num) {
            break;
        }

        //学習
        setData(batch_size);
        TF_CHECK_OK(session.Run({ { x, x_data },{ y, y_data } }, { apply_w1, apply_b1, apply_w2, apply_b2 }, nullptr));
    }

    TF_CHECK_OK(session.Run({ {file_name_op, file_name}, {tensor_names_op, tensor_names} }, {}, { save }, nullptr));
}

　入力を

項目	値
ユニット数	1000
学習率	0.00001
バッチサイズ	1000
エポック数	100
変数の設定	0

のようにして実行するとmodel.ckptが生成され、その後変数の設定を1にして繰り返し実行すると直前で実行された損失とほとんど同じ損失(ランダム性があるのでぴったり同じとはならない)から始まり、着実に損失が減っていくことが確認できた。