Last active
November 17, 2023 11:27
-
-
Save ismail1432/50c3f94f5cb4db92068c6f38d0d1fbed to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
declare(strict_types=1); | |
namespace Infrastructure\Symfony\Mime; | |
use Symfony\Component\Mime\FileBinaryMimeTypeGuesser; | |
use Symfony\Component\Mime\MimeTypeGuesserInterface; | |
/** | |
* Proxy to fix weird mimetype guessing for docx file. | |
*/ | |
final class FileBinaryMimeTypeGuesserProxy implements MimeTypeGuesserInterface | |
{ | |
private const DOCX_MIMETYPE = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'; | |
private const OCTET_STREAM_MIMETYPE = 'application/octet-stream'; | |
private const ZIP_MIMETYPE = 'application/zip'; | |
private const MAIN_PART_CONTENT_TYPE = '[Content_Types].xml'; | |
private FileBinaryMimeTypeGuesser $fileBinaryMimeTypeGuesser; | |
public function __construct() | |
{ | |
// the class we decorate | |
$this->fileBinaryMimeTypeGuesser = new FileBinaryMimeTypeGuesser(); | |
} | |
public function isGuesserSupported(): bool | |
{ | |
return $this->fileBinaryMimeTypeGuesser->isGuesserSupported(); | |
} | |
/** | |
* In case if the mimetype is "application/octet-stream" or "application/zip" we check deeper if it's a docx otherwise | |
* we return the mimetype guessed by FileBinaryMimeTypeGuesser. | |
*/ | |
public function guessMimeType(string $path): ?string | |
{ | |
$mimeType = $this->fileBinaryMimeTypeGuesser->guessMimeType($path); | |
// early return when the mime type the is well guessed | |
// (in case of the MIME type cannot be a docx weird guessed) | |
if (true === $this->cannotBeAnUnguessedDocx($mimeType)) { | |
return $mimeType; | |
} | |
try { | |
// We inspect the file | |
$zipClass = new \ZipArchive(); | |
$zipClass->open($path); | |
// copied from https://github.com/PHPOffice/PHPWord/blob/be0190cd5d8f95b4be08d5853b107aa4e352759a/src/PhpWord/TemplateProcessor.php#L1023 | |
$contentTypes = $zipClass->getFromName(self::MAIN_PART_CONTENT_TYPE); | |
if (false === $contentTypes) { | |
return $mimeType; | |
} | |
// Usually, the name of main part document will be 'document.xml'. However, some .docx files (possibly those from Office 365, experienced also on documents from Word Online created from blank templates) have file 'document22.xml' in their zip archive instead of 'document.xml'. | |
// We look for "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml" in content type to determine the document as docx | |
// @see http://officeopenxml.com/anatomyofOOXML.php | |
$pattern = '~PartName="\/(word\/document.*?\.xml)" ContentType="application\/vnd\.openxmlformats-officedocument\.wordprocessingml\.document\.main\+xml"~'; | |
return 1 === preg_match($pattern, $contentTypes) ? self::DOCX_MIMETYPE : $mimeType; | |
} catch (\Exception $e) { | |
return $mimeType; | |
} | |
} | |
/** | |
* Only mimetype application/octet-stream or application/zip can be docx. | |
*/ | |
private function cannotBeAnUnguessedDocx(?string $mimeType): bool | |
{ | |
return self::OCTET_STREAM_MIMETYPE !== $mimeType | |
&& self::ZIP_MIMETYPE !== $mimeType | |
; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment