天天看点

用文件映射的方式读取 txt 文件点云数据

文章目录

      • 说明
      • 代码
      • 结果对比

说明

参考博客:点云读取速度比较——QTextStream、C++文件流、C++文件映射

代码

#include <iostream>
#include <vector>
#include <string>
#include <chrono>
#include <fstream>
#include <sstream>
#include "windows.h"

std::string path = "G:/Data/YYElse/yy.txt";

int txt1(std::vector<std::vector<double>> *const pVecCloud, bool delNAN) {

    // 用于测试时间差
    auto beginTime = std::chrono::high_resolution_clock::now();

    HANDLE hSrcFile = CreateFileA(path.c_str(), GENERIC_READ, 0, NULL, OPEN_ALWAYS, 0, NULL);
    if (hSrcFile == INVALID_HANDLE_VALUE) return 0;
    LARGE_INTEGER tInt2;
    GetFileSizeEx(hSrcFile, &tInt2);
    __int64 dwRemainSize = tInt2.QuadPart;
    __int64 dwFileSize = dwRemainSize;
    HANDLE hSrcFileMapping = CreateFileMapping(hSrcFile, NULL, PAGE_READONLY, tInt2.HighPart, tInt2.LowPart, NULL);
    if (hSrcFileMapping == INVALID_HANDLE_VALUE) {
        std::cout << " > Lose ...\n";
        return 0;
    }
    SYSTEM_INFO SysInfo;
    GetSystemInfo(&SysInfo);
    DWORD dwGran = SysInfo.dwAllocationGranularity;
    const int BUFFERBLOCKSIZE = dwGran * 1024;

    // 用于标记你读取的是什么格式的点云数据
    const int XYZI_FC = 4;

    bool AlreadySetFiledCount = false;//是否已经设置了数据宽度
    int usefulFiledCount = 0; // 有效文件行数

    int totalRows = 0;  //文件总行数:
    int FieldIndex = 0; //每一个小数字的填充位置
    int FieldCount = 0; //每一行中整数字位置,用来判定数据列数究竟是XYZARGB。
    double arrXYZ_I[XYZI_FC];
    char  strLine[1024] = { 0 };

    std::cout << " > Start ...\n";
    std::vector<std::vector<double>>().swap(*pVecCloud); // 清空原始的数据
    while (dwRemainSize > 0) {
        DWORD dwBlock = dwRemainSize < BUFFERBLOCKSIZE ? dwRemainSize : BUFFERBLOCKSIZE;
        __int64 qwFileOffset = dwFileSize - dwRemainSize;
        PBYTE pSrc = (PBYTE)MapViewOfFile(hSrcFileMapping, FILE_MAP_READ, (DWORD)(qwFileOffset >> 32), (DWORD)(qwFileOffset & 0xFFFFFFFF), dwBlock);
        PBYTE pSrcBak = pSrc;
        for (int i = 0; i < dwBlock; i++) {
            //这样的处理方式有一个很大的缺点
            //当整个文件的最后一行不是空一行的话,整个数据会少一行。
            //但是一般默认情况下整个数据的最后一行是有一个换行的
            if (*pSrc == '\n') {
                //整行读完了====================================================
                if (FieldIndex != 0) { //先处理一次字段。
                    strLine[FieldIndex] = '\0';//在末尾处加上符号。
                    arrXYZ_I[FieldCount++] = atof(strLine);
                    FieldIndex = 0;
                }

                usefulFiledCount = XYZI_FC;

                std::vector<double> vTemp;
                {
                    vTemp.push_back(arrXYZ_I[0]);
                    vTemp.push_back(arrXYZ_I[1]);
                    vTemp.push_back(arrXYZ_I[2]);
                    vTemp.push_back(arrXYZ_I[3]);
                }
                (*pVecCloud).push_back(vTemp);
                totalRows++;
                FieldCount = 0;//字段位置清零
                memset(strLine, 0, sizeof(strLine));//数字字符数组清空
            } else if ((*pSrc >= '0' && *pSrc <= '9') || *pSrc == '.' || *pSrc == '-' || *pSrc == 'e' || *pSrc == '+') {
                // 若果以以上内容结尾,则跳过该行
                strLine[FieldIndex++] = *pSrc;
            } else {
                //此时为行内分割===关键是连续几次无用字符==============================
                if (FieldIndex != 0) {
                    //一个字段处理完毕
                    strLine[FieldIndex] = '\0';
                    arrXYZ_I[FieldCount++] = atof(strLine);
                    FieldIndex = 0;
                }
            }
            pSrc++;
        }
        UnmapViewOfFile(pSrcBak);
        dwRemainSize -= dwBlock;
    }
    CloseHandle(hSrcFileMapping);
    CloseHandle(hSrcFile);

    auto endTime = std::chrono::high_resolution_clock::now();
    auto elapsedTime = std::chrono::duration_cast<std::chrono::seconds>(endTime - beginTime);
    std::cout << "time cost:" << elapsedTime.count() << std::endl;
    std::cout << " > End ...\n";
    return totalRows;
}

int txt2(std::vector<std::vector<double>> *const pVecCloud, bool delNAN) {
    std::ifstream ifs(path, std::ios::in);
    if (!ifs) {
        return -200;
    }

    // 用于测试时间差
    auto beginTime = std::chrono::high_resolution_clock::now();

    // double maxX, minX, maxY, minY, maxZ, minZ; // 定义最值,用于获取偏移量
    // maxX = maxY = maxZ = -INFINITY;
    // minX = minY = minZ = INFINITY;
    int _pointCount = 0;
    std::string lineStr;
    while (getline(ifs, lineStr)) {
        std::stringstream ss(lineStr);
        std::string str;
        std::vector<double> lineArray;
        bool flag = true;
        int col = 0;
        while (getline(ss, str, ' ')) {
            if (col >= 4) break; // 只读取 XYZI
            if (delNAN == true) {
                if (str != "NAN" && str != "nan") {
                    lineArray.push_back(std::stod(str));
                } else {
                    flag = false; break;
                }
            } else {
                lineArray.push_back(std::stod(str));
            }
            col++;
        }
        if (lineArray.size() <= 3) return -1; // 说明没有反射强度
        if (flag == true) {
            pVecCloud->push_back(lineArray);
            // 更新偏移量
            // double x = lineArray[0];
            // double y = lineArray[1];
            // double z = lineArray[2];
            // maxX = std::fmax(maxX, x); minX = std::fmin(minX, x);
            // maxY = std::fmax(maxY, y); minY = std::fmin(minY, y);
            // maxZ = std::fmax(maxZ, z); minZ = std::fmin(minZ, z);
            _pointCount++;
        }
    }

    auto endTime = std::chrono::high_resolution_clock::now();
    auto elapsedTime = std::chrono::duration_cast<std::chrono::seconds>(endTime - beginTime);
    std::cout << "time cost:" << elapsedTime.count() << std::endl;

    return _pointCount;
}


int main() {

    std::vector<std::vector<double>> pVecCloud1;
    txt1(&pVecCloud1, true);
    std::cout << pVecCloud1.size() << std::endl;

    std::cout << "\n--------------------------\n\n";

    std::vector<std::vector<double>> pVecCloud2;
    txt2(&pVecCloud2, true);
    std::cout << pVecCloud2.size() << std::endl;

    return 0;
}
           

结果对比

实测 2000万+ 的点云数据

文件映射方式:43s

文件流读取:59s