一,问题背景

多进程之间需要传输大量数据的时候,比如多进程 RPC 框架的进程之间通信,常用共享内存队列。

但是共享内存队列难免会有 入队+出队 2次 memcpy 。

而且要变长共享内存队列,如果支持多生产者进程+多消费者进程 ,就要处理线程安全方面的问题, 比较麻烦。

process_vm_readv() , process_vm_writev() 是 Linux 3.2 新增的 syscall,用于在多个进程的地址空间之间,高效传输大块数据。

https://www.man7.org/linux/man-pages/man2/process_vm_readv.2.html

https://github.com/open-mpi/ompi/blob/master/opal/mca/btl/sm/btl_sm_get.c#L96

在此, 我提个设想,可以用 process_vm_readv 实现一个多进程内存队列,相比之下,优势是:

  1. 在处理 多线程/多进程 并发时,更简单
  2. 省掉一次 memcpy。

下面 demo 代码演示了这个思路。 为了方便,直接用 posix mq 来传输 iovec 数组,也没有考虑多路复用。 真实项目可能需要 eventfd 之内的机制来做 notify。

实测了下,3个进程加起来有 5.5 GB/秒 的速度。

二,代码

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190

#include <assert.h>
#include <fcntl.h>
#include <mqueue.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/wait.h>
#include <unistd.h>

#include <iostream>
#include <sstream>
#include <vector>
using namespace std;

struct MemMessage {
    pid_t pid = 0;
    vector<iovec> mem_vec;

public:
    MemMessage() {}

    string DebugString() const {
        ostringstream oss;
        oss << "pid=" << pid;
        for (const auto& mem : mem_vec) {
            oss << " iov={" << mem.iov_base << "," << mem.iov_len << "}";
        }
        return oss.str();
    }

    string ToString() const {
        pid_t from_pid = getpid();
        string buff;
        buff.append((const char*)&from_pid, sizeof(from_pid));
        for (const auto& mem : mem_vec) {
            buff.append((const char*)&mem, sizeof(mem));
        }
        return buff;
    }

    bool FromString(const char* buff, size_t buff_len) {
        size_t pos = 0;
        if (buff_len < sizeof(pid)) {
            return false;
        }
        pid = *((pid_t*)buff);
        pos += sizeof(pid);
        for (; pos + sizeof(iovec) <= buff_len; pos += sizeof(iovec)) {
            mem_vec.resize(mem_vec.size() + 1);
            auto& iov = mem_vec.back();
            memcpy(reinterpret_cast<char*>(&iov), buff + pos, sizeof(iov));
        }
        return true;
    }

    void CopyToMem(string& buff) {
        size_t sum = 0;
        for (const auto& iov : mem_vec) {
            sum += iov.iov_len;
        }

        buff.resize(sum, '\0');
        iovec local;
        local.iov_base = &buff[0];
        local.iov_len = buff.size();

        auto nread = process_vm_readv(pid, &local, 1, &mem_vec[0], mem_vec.size(), 0);
        // assert(nread >= 0);
        if (nread <= 0) {
            cout << getpid() << " process_vm_readv from=" << DebugString() << " sum=" << sum << " len=" << nread
                 << endl;
            cout << "pid=" << pid << " " << strerror(errno) << endl;
        }
    }
};

class MemQueue {
    mqd_t m_mq;
    struct mq_attr m_attr;

public:
    int Init(const string& queue_name, bool read_write = false) {
        memset(&m_attr, 0, sizeof(m_attr));
        m_attr.mq_flags = 0;
        m_attr.mq_maxmsg = 10;
        m_attr.mq_msgsize = 4096;
        const int oflag = read_write ? O_WRONLY | O_CREAT : O_RDONLY;
        m_mq = mq_open(queue_name.c_str(), oflag, S_IRWXU, &m_attr);
        // m_mq = mq_open(queue_name.c_str(), oflag);
        cout << getpid() << " mq_open=" << queue_name << " oflag=" << oflag << " ret=" << m_mq << endl;
    }

    int PushMemMsg(const MemMessage& msg) {
        string buff = msg.ToString();
        return mq_send(m_mq, buff.c_str(), buff.size(), 0);
    }

    bool PopMemMsg(MemMessage& msg) {
        string buff(m_attr.mq_msgsize, '\0');
        unsigned prio = 0;
        const auto ret = mq_receive(m_mq, &buff[0], buff.size(), &prio);
        if (ret < 0) {
            return false;
        }
        buff.resize(ret);
        msg.FromString(buff.data(), buff.size());
        return true;
    }
};

const static string queue_name = "/test_queue";

enum STATUS { STATUS_WAITING = 0, STATUS_RUNNING = 1, STATUS_EXIT = 2 };

struct ControlCenter {
    int proc_status;
};

ControlCenter* control = nullptr;

void child_proc() {
    const auto ret = fork();
    assert(ret >= 0);
    if (ret != 0) {
        return;
    }

    while (STATUS_WAITING == control->proc_status) {
        usleep(1000);
    }

    MemQueue q;
    q.Init(queue_name);
    size_t bytes = 0;
    while (STATUS_RUNNING == control->proc_status) {
        MemMessage msg;
        if (q.PopMemMsg(msg)) {
            string buff;
            msg.CopyToMem(buff);
            bytes += buff.size();
        } else {
            usleep(1000);
        }
    }
    cout << getpid() << " bytes=" << bytes << " exit" << endl;

    exit(0);
}

int main(int argc, char* argv[]) {
    control = (ControlCenter*)mmap(NULL, sizeof(*control), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
    control->proc_status = STATUS_WAITING;

    const static int proc_num = 3;
    for (int i = 0; i < proc_num; ++i) {
        child_proc();
    }

    MemQueue q;
    q.Init(queue_name, true);

    control->proc_status = STATUS_RUNNING;
    string str(1024 * 1024 * 10, 'a');
    for (int i = 0; i < 10000; ++i) {
        MemMessage msg;
        for (int t = 0; t < 3; ++t) {
            size_t base = rand() % str.size();
            size_t size = rand() % (str.size() - base);
            msg.mem_vec.emplace_back(iovec{&str[base], size});
        }
        const auto ret = q.PushMemMsg(msg);
        // cout << getpid() << " PushMemMsg=" << msg.DebugString() << " ret=" << ret << endl;

        // sleep(1);
    }

    control->proc_status = STATUS_EXIT;

    for (int i = 0; i < proc_num; ++i) {
        int status = 0;
        wait(&status);
    }

    return 0;
}