Skip to content
Advertisement

Convert Excel file to array while preseerving formatting when using PHPSpreadsheet

Whatever I do, my array does not keep cell value formatting when I convert data to array.

How to convert formatting to HTML preferably?

<?php
namespace AppService;

use PhpOfficePhpSpreadsheetReaderXls;
use PhpOfficePhpSpreadsheetReaderXlsx;
use SymfonyComponentHttpFoundationFileUploadedFile;
use PhpOfficePhpSpreadsheetIOFactory as ExcelFactory;

class ExcelService
{
    public function getArray(UploadedFile $uploadedFile): array
    {
        $path = sprintf('%s/%s', $uploadedFile->getPath(), $uploadedFile->getFilename());

        $reader = ExcelFactory::createReaderForFile($path);

        // assure that we are reading styling as well as data
        $reader->setReadDataOnly(false);

        $spreadsheet = $reader->load($path);
        $sheet = $spreadsheet->getActiveSheet();

        $maxCol = $sheet->getHighestDataColumn();
        $maxRow = $sheet->getHighestDataRow();

        $colAlpha = range('A', $maxCol);
        $colRange = count($colAlpha);

        $array = [];

        for ($indexRow = 0; $indexRow <= $maxRow; $indexRow++) {
            for ($indexCol = 0; $indexCol < $colRange; $indexCol++) {
                $cellCoords = sprintf('%s%d', $colAlpha[$indexCol], $indexRow + 1);
                $cellObject = $sheet->getCell($cellCoords);
                $cellValue = trim((string) $cellObject->getValue());

                $array[$indexRow][$indexCol] = $cellValue;
            }
        }

        return $array;
    }
}

Advertisement

Answer

Looks like converting spreadsheet to HTML file, and reading that through DOMDocument parser work for me for now.

<?php
namespace AppService;

use PhpOfficePhpSpreadsheetReaderXls;
use PhpOfficePhpSpreadsheetReaderXlsx;
use PhpOfficePhpSpreadsheetSpreadsheet;
use PhpOfficePhpSpreadsheetWriterHtml;
use SymfonyComponentHttpFoundationFileUploadedFile;
use PhpOfficePhpSpreadsheetIOFactory as ExcelFactory;
use DOMDocument;
use DOMXPath;

class ExcelService
{
    public function getArray(UploadedFile $uploadedFile): array
    {
        $path = sprintf('%s/%s', $uploadedFile->getPath(), $uploadedFile->getFilename());

        $excelReader = ExcelFactory::createReaderForFile($path);
        $excelSpreadSheet = $excelReader->load($path);

        // save spreadsheet as HTML file as we cannot access cell value formatting directly as HTML
        $htmlPath = tempnam(sys_get_temp_dir(), 'temp_html');
        $htmlWriter = new Html($excelSpreadSheet);
        $htmlWriter->save($htmlPath);

        return $this->getFileData($htmlPath);
    }

    private function getFileData(string $path): array
    {
        $data = [];
        $contents = file_get_contents($path);

        $dom = new DOMDocument();
        $dom->loadHTML($contents);

        $xpath = new DOMXPath($dom);
        $tables = $xpath->query('//table');

        foreach ($tables as $indexTable => $table) {
            $rows = $xpath->query(sprintf('%s//tr', $table->getNodePath()));

            foreach ($rows as $indexRow => $row) {
                $columns = $xpath->query(sprintf('%s/td', $row->getNodePath()));

                foreach ($columns as $indexColumn => $column) {
                    $doc = $column->ownerDocument;
                    $value = '';

                    foreach ($column->childNodes as $n) {
                        $value .= $doc->saveHTML($n);
                    }

                    $data[$indexTable][$indexRow][$indexColumn] = $value;
                }
            }
        }

        return $data;
    }
}
User contributions licensed under: CC BY-SA
1 People found this is helpful
Advertisement