天天看点

【小笔记】大数据量excel解析工具性能对比

1. Excel存储格式及解析流程

1.1 存储格式

Excel本质上是以xml存储的,这些xml内容符合office open xml规范。把后缀名改为压缩文件的后缀名(zip,tar等等)可以看到其基本结构:

【小笔记】大数据量excel解析工具性能对比

其主要数据内容保存于sharedString.xml以及worksheets里的xml:

【小笔记】大数据量excel解析工具性能对比

实际上对于Excel的解析就是对于XML的解析,但是各个xml之间存在着关系,解析时更为复杂。

1.2 解析流程

【小笔记】大数据量excel解析工具性能对比

2. 写入性能对比

2.1 测试代码:

见文末

2.2 结果

模式 10万数据 100万数据
POI(XSSF) 10833ms GC overhead limit exceeded
POI(SXSSF) 1378ms 9274ms
EasyExcel 1339ms 9077ms

结论:

10万级别POI的SXSSF和EasyExcel的速度基本一致。

100万级别POI的XSSF模式直接无法生成,而SXSSF模式和EasyExcel的速度基本一致。

2.3 分析

POI传统模式XSSF会全部写入内存,内存占用很高,然后一次性刷盘;而SXSSF模式基于滑动窗口,部分刷盘,所以避免了大量GC时间及内存占用。

SXSSF (package: org.apache.poi.xssf.streaming) is an API-compatible streaming extension of XSSF to be used when very large spreadsheets have to be produced, and heap space is limited. SXSSF achieves its low memory footprint by limiting access to the rows that are within a sliding window, while XSSF gives access to all rows in the document. Older rows that are no longer in the window become inaccessible, as they are written to the disk.

https://poi.apache.org/components/spreadsheet/how-to.html#sxssf

所以,通过这个问题也能看出,把数据完全放在内存操作在某些场景下并不是最好的,性能反而不如多次IO操作,因为其中可能会产生FULL CG而占用大量时间。

3. 读取性能对比

3.1 测试代码

3.2 结果

数据量 POI耗时 EasyExcel耗时
10万 4223ms 2813ms

10万数据量,EasyExcel比POI快了1.4秒左右,数据量更大时差异更明显。

3.3 分析

4. 多线程解析表格

线程数 耗时
1 1807ms
2 1425ms
4 1311ms
8 2335ms
36356ms
18064ms
9082ms
4532ms
16 2662ms
32 1287ms
64 688ms
128 380ms
256 335ms
512 1253ms

5. 测试代码

import cn.hutool.core.date.StopWatch;
import cn.hutool.core.util.RandomUtil;
import com.alibaba.excel.EasyExcel;
import com.alibaba.excel.ExcelReader;
import com.alibaba.excel.ExcelWriter;
import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.event.AnalysisEventListener;
import com.alibaba.excel.read.metadata.ReadSheet;
import com.alibaba.excel.write.metadata.WriteSheet;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.junit.Test;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.*;

public class ExcelUtilTest {
    private static final String[] USERNAME = {"赵", "钱", "孙", "李", "周", "吴", "郑", "王", "冯", "陈", "褚", "卫", "蒋", "沈", "韩", "杨", "朱", "秦", "尤", "许",
            "何", "吕", "施", "张", "孔", "曹", "严", "华", "金", "魏", "陶", "姜", "戚", "谢", "邹", "喻", "柏", "水", "窦", "章", "云", "苏", "潘", "葛", "奚", "范", "彭", "郎",
            "鲁", "韦", "昌", "马", "苗", "凤", "花", "方", "俞", "任", "袁", "柳", "酆", "鲍", "史", "唐", "费", "廉", "岑", "薛", "雷", "贺", "倪", "汤", "滕", "殷",
            "罗", "毕", "郝", "邬", "安", "常", "乐", "于", "时", "傅", "皮", "卞", "齐", "康", "伍", "余", "元", "卜", "顾", "孟", "平", "黄", "和",
            "穆", "萧", "尹", "姚", "邵", "湛", "汪", "祁", "毛", "禹", "狄", "米", "贝", "明", "臧", "计", "伏", "成", "戴", "谈", "宋", "茅", "庞", "熊", "纪", "舒",
            "屈", "项", "祝", "董", "梁", "杜", "阮", "蓝", "闵", "席", "季"};
    private static final String GIRL = "秀娟英华慧巧美娜静淑惠珠翠雅芝玉萍红娥玲芬芳燕彩春菊兰凤洁梅琳素云莲真环雪荣爱妹霞香月莺媛艳瑞凡佳嘉琼勤珍贞莉桂娣叶璧璐娅琦晶妍茜秋珊莎锦黛青倩婷姣婉娴瑾颖露瑶怡婵雁蓓纨仪荷丹蓉眉君琴蕊薇菁梦岚苑婕馨瑗琰韵融园艺咏卿聪澜纯毓悦昭冰爽琬茗羽希宁欣飘育滢馥筠柔竹霭凝晓欢霄枫芸菲寒伊亚宜可姬舒影荔枝思丽 ";
    private static final String BOY = "伟刚勇毅俊峰强军平保东文辉力明永健世广志义兴良海山仁波宁贵福生龙元全国胜学祥才发武新利清飞彬富顺信子杰涛昌成康星光天达安岩中茂进林有坚和彪博诚先敬震振壮会思群豪心邦承乐绍功松善厚庆磊民友裕河哲江超浩亮政谦亨奇固之轮翰朗伯宏言若鸣朋斌梁栋维启克伦翔旭鹏泽晨辰士以建家致树炎德行时泰盛雄琛钧冠策腾楠榕风航弘";

    /**
     * 写测试
     */
    @Test
    public void test() throws IOException {
        int number = 100000;

        StopWatch sw = new StopWatch();

        sw.start();
        poiTest(number, "XSSF");
        sw.stop();
        System.out.println("POI(XSSF)写入" + number + "条数据耗时" + sw.getLastTaskTimeMillis() + "ms");

        sw.start();
        poiTest(number, "SXSSF");
        sw.stop();
        System.out.println("POI(SXSSF)写入" + number + "条数据耗时" + sw.getLastTaskTimeMillis() + "ms");

        sw.start();
        easyExcelTest(number);
        sw.stop();
        System.out.println("EasyExcel写入" + number + "条数据耗时" + sw.getLastTaskTimeMillis() + "ms");

    }

    private void poiTest(int number, String type) throws IOException {
        String path = "D:\\tmp\\test.xlsx";
        try (Workbook wb = "SXSSF".equals(type) ? new SXSSFWorkbook() : new XSSFWorkbook()) {
            Sheet sheet = wb.createSheet();
            Row row = sheet.createRow(0);
            row.createCell(0).setCellValue("ID");
            row.createCell(1).setCellValue("姓名");
            row.createCell(2).setCellValue("年龄");
            row.createCell(3).setCellValue("性别");
            row.createCell(4).setCellValue("是否会员");
            for (int i = 1; i < number; i++) {
                row = sheet.createRow(i);
                List<String> randomData = this.getRandomData();
                row.createCell(0).setCellValue(i);
                for (int col = 0; col < randomData.size(); col++) {
                    row.createCell(col + 1).setCellValue(randomData.get(col));
                }
            }
            wb.write(new FileOutputStream(path));
        }
    }

    private void easyExcelTest(int number) throws IOException {
        List<List> dataList = new LinkedList<>();
        List<String> header = new LinkedList<>();
        header.add("ID");
        header.add("姓名");
        header.add("年龄");
        header.add("性别");
        header.add("是否会员");
        dataList.add(header);
        for (int i = 1; i < number; i++) {
            List<String> randomData = this.getRandomData();
            List<String> data = new LinkedList<>();
            data.add(String.valueOf(i));
            data.addAll(randomData);
            dataList.add(data);
        }
        String path = "D:\\tmp\\test.xlsx";
        File file = new File(path);
        ExcelWriter excelWriter = EasyExcel.write(file).build();
        WriteSheet writeSheet = EasyExcel.writerSheet(0).build();
        excelWriter.write(dataList, writeSheet);
        excelWriter.finish();
    }

    private List<String> getRandomData() {
        int usernameRandom = RandomUtil.randomInt(0, USERNAME.length - 1);
        String name = USERNAME[usernameRandom];
        if (usernameRandom % 2 == 0) {
            name += GIRL.substring(usernameRandom % GIRL.length(), (usernameRandom + 2) % GIRL.length());
        } else {
            name += BOY.substring(usernameRandom % BOY.length(), (usernameRandom + 1) % BOY.length());
        }
        String age = String.valueOf(RandomUtil.randomInt(10, 50));
        String sex = usernameRandom % 2 == 0 ? "女" : "男";
        String isVip = usernameRandom % 2 == 0 ? "是" : "否";

        return Arrays.asList(name, age, sex, isVip);
    }


    /**
     * 读测试
     */
    @Test
    public void readTest() throws IOException {
        StopWatch sw = new StopWatch();
        sw.start();
        poiReadTest();
        sw.stop();
        System.out.println("POI读取数据耗时" + sw.getLastTaskTimeMillis() + "ms");

        sw.start();
        easyExcelReadTest();
        sw.stop();
        System.out.println("EasyExcel读取数据耗时" + sw.getLastTaskTimeMillis() + "ms");
    }

    private void poiReadTest() throws IOException {
        String path = "D:\\tmp\\test.xlsx";
        File file = new File(path);
        List<List<String>> result = new LinkedList<>();
        Workbook wb = WorkbookFactory.create(file);
        Sheet sheet = wb.getSheetAt(0);
        Iterator<Row> rowIterator = sheet.rowIterator();
        Row row;
        while (rowIterator.hasNext()) {
            row = rowIterator.next();

            Iterator<Cell> cellIterator = row.cellIterator();
            List<String> data = new LinkedList<>();
            while (cellIterator.hasNext()) {
                data.add(cellIterator.next().getStringCellValue());
            }
            result.add(data);
        }
        wb.close();
        System.out.println("获取到" + result.size() + "条数据");
    }

    private void easyExcelReadTest() throws IOException {
        String path = "D:\\tmp\\test.xlsx";
        File file = new File(path);
        ExcelReader excelReader = EasyExcel.read(file, new ExcelListener()).build();
        ReadSheet readSheet = new ReadSheet(0);
        excelReader.read(readSheet);
        excelReader.finish();
    }

    private class ExcelListener extends AnalysisEventListener<LinkedHashMap> {

        private List<LinkedHashMap> result = new ArrayList<>();

        @Override
        public void invoke(LinkedHashMap linkedHashMap, AnalysisContext analysisContext) {
            result.add(linkedHashMap);
        }

        @Override
        public void doAfterAllAnalysed(AnalysisContext analysisContext) {
            System.out.println("获取到" + result.size() + "条数据");
        }
    }
}

           

网络上志同道合,我们一起学习网络安全,一起进步。