nas_docker_compose/kodbox/site/app/kod/FileParsePdf.class.php

<?php
/*
* @link http://kodcloud.com/
* @author warlee | e-mail:kodcloud@qq.com
* @copyright warlee 2014.(Shanghai)Co.,Ltd
* @license http://kodcloud.com/tools/license/license.txt
*/


/**
 * 解析获取pdf文件信息;
 * 
 * pdfparser https://www.pdfparser.org/documentation
 * mpdf编辑: http://mpdf.github.io/
 */
class FileParsePdf{
	public static function parse($filePath){
		$chunkSize	= 32 * 1024;//trailer处理;
		$fileInfo   = array(
			'fp'		=> fopen($filePath,'r'),
			'path'		=> $filePath,
			'size'		=> filesize_64($filePath),
			'chunkSize' => $chunkSize,
		);
		$fileInfo['dataStart'] = StreamWrapperIO::read($filePath,0,$chunkSize);
		$fileInfo['dataEnd']   = StreamWrapperIO::read($filePath,$fileInfo['size'] - $chunkSize,$chunkSize);
		// if($_GET['debug'] == '1'){
		// 	include('/Library/WebServer/Documents/localhost/test/000/test/pdfparser-0.18.1/vendor/autoload.php');
		// 	$parser = new \Smalot\PdfParser\Parser();
		// 	$pdf = $parser->parseFile($filePath);pr($pdf->getDetails());exit;
		// }
		
		$xref = self::decodeXref($fileInfo);
		if($xref){
			$infoKey  = $xref['trailer']['info'];
			$dataInfo = self::getObjectValue($fileInfo,$xref,$infoKey);
		}
		
		$dataInfo = is_array($dataInfo) ? $dataInfo : array();
		// 页面尺寸处理;
		$dataInfo['sizeWidth'] = 0;		
		$theReg = '/[\s]*\/MediaBox[\s]*\[[\s]*([0-9\.]+)[\s]+([0-9\.]+)[\s]+([0-9\.]+)[\s]+([0-9\.]+)[\s]*\]/i';
		preg_match($theReg,$fileInfo['dataStart'],$matches);
		if (!$dataInfo['sizeWidth'] && count($matches) == 5){
			$dataInfo['sizeWidth']  = $matches[3];
			$dataInfo['sizeHeight'] = $matches[4];
		}
		preg_match($theReg,$fileInfo['dataEnd'],$matches);
		if (!$dataInfo['sizeWidth'] && count($matches) == 5){
			$dataInfo['sizeWidth']  = $matches[3];
			$dataInfo['sizeHeight'] = $matches[4];
		}
		preg_match('/%PDF-([0-9\.]+)/',$fileInfo['dataStart'],$matches);
		if($matches){$dataInfo['version'] = $matches[1];}

		// // 页数计算处理; /Count 8
		$dataInfo['pageNumber'] = 0;
		$theReg = "/[\s]*\/Count[\s]+([0-9]+)[\s]*/i";
		
		preg_match_all($theReg,$fileInfo['dataStart'],$matches);
		if($matches[1] && $dataInfo['pageNumber'] < $matches[1][0]){
			$dataInfo['pageNumber'] = $matches[1][0];
		}		
		preg_match_all($theReg,$fileInfo['dataEnd'],$matches);
		if($matches[1] && $dataInfo['pageNumber'] < $matches[1][0]){
			$dataInfo['pageNumber'] = $matches[1][0];
		}
		
		$dataInfo = self::parseInfoItem($dataInfo);
		return $dataInfo;
	}	
	private static function parseInfoItem($dataInfo){
		if(!$dataInfo) return false;
		$picker = array( //数值统一筛选并处理;
			'title' 		 => array('Title',''),				// 标题
			'auther'	 	 => array('Author',''),				// 作者
			'createTime'	 => array('CreationDate','date'),	// 创建日期
			'modifyTime' 	 =>	array('ModDate','date'),		// 修改日期
			'pageNumber'	 => array('pageNumber','int'),		// 页数
			'sizeWidth'		 => array('sizeWidth','int'),		// 页面宽度
			'sizeHeight'	 => array('sizeHeight','int'),		// 页面高度
			'creator'	 	 => array('Creator',''),			// 内容创作者
			'producer'	 	 => array('Producer',''),			// 编码软件
			'pdfVersion'	 => array('version',''),			// PDF 版本;
		);
		
		$result = array();
		foreach ($picker as $key => $info){
			if(!isset($dataInfo[$info[0]])) continue;
			$value = $dataInfo[$info[0]];
			if(!$value || is_array($value)) continue;
			
			switch($info[1]){
				case 'int' :$value = intval($value);break;
				case 'date':
					if(substr($value,0,2) == 'D:') {
						$value = substr($value,2,14);
					}
					if(strtotime($value)){
						$value = date('Y-m-d H:i:s',strtotime($value));
					}
					break;
			}
			$result[$key] = $value;
		}
		// pr($result,$dataInfo);exit;
		return $result;
	}
	
	private static function decodeXref(&$fileInfo){
		$pdfData = $fileInfo['dataEnd'];
		$xref   = array('trailer'=>array(),'xref'=>array());
		$theReg = '/[\r\n]startxref[\s]*[\r\n]*([0-9]+)[\s]*[\r\n]+%%EOF/i';
		if(!preg_match_all($theReg,$pdfData, $matches,PREG_SET_ORDER,0)) return false;

		// 结尾block索引开始位置,比最小block小则加大;
		$startxref = intval($matches[0][1]);
		if($fileInfo['size'] - $startxref > $fileInfo['chunkSize']){
			$chunkSize = 4 * $fileInfo['chunkSize'];
			$fileInfo['chunkSize'] = $chunkSize;
			$fileInfo['dataStart'] = StreamWrapperIO::read($fileInfo['path'],0,$chunkSize);
			$fileInfo['dataEnd']   = StreamWrapperIO::read($fileInfo['path'],$fileInfo['size'] - $chunkSize,$chunkSize);
			$pdfData = $fileInfo['dataEnd'];
		}

		$objNum = 0;
		// preg_match_all('/([0-9]+)[\x20]([0-9]+)[\x20]?([nf]?)(\r\n|[\x20]?[\r\n])/',$pdfData,$matches);
		preg_match_all('/([0-9]+)[\x20]([0-9]+)[\x20]?([nf]?)(\r\n|[\x20]?[\r\n])/',$pdfData,$matches);
		foreach ($matches[3] as $i=>$item){
			if($matches[3][$i] == 'n'){
				$index = $objNum.'_'.intval($matches[2][$i]);
				$xref['xref'][$index] = intval($matches[1][$i]);
                ++$objNum;
            }else if ($matches[3][$i] == 'f') {
                ++$objNum;
            } else {
				// $objNum = intval($matches[1][$i]); //从1开始;
            }
		}
		
		// 没有索引的情况处理; 优先按照的的进行匹配;
		if(preg_match_all('/[\r\n]([0-9]+)[\s]+([0-9]+)[\s]+obj/iU',$pdfData, $matches)){
			$fileOffset = $fileInfo['size'] - $fileInfo['chunkSize'];
			foreach ($matches[0] as $i => $theValue) {
				$key   = $matches[1][$i].'_'.$matches[2][$i];
				$xref['xref'][$key] = strpos($pdfData,$theValue) + $fileOffset + 1;
			}
		}

		if(preg_match_all('/trailer[\s]*<<(.*)>>/isU',$pdfData,$matches)){
			$trailerData = count($matches[1]) == 1 ? $matches[1][0] : $matches[1][1];
		}else{// 兼容没有trailer情况的数据; 直接从文件最后正则匹配查找;
			$trailerData = substr($pdfData, -1024*5);
		}
		if (preg_match('/Size[\s]+([0-9]+)/i', $trailerData, $matches) > 0) {
			$xref['trailer']['size'] = intval($matches[1]);
		}
		if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailerData, $matches) > 0) {
			$xref['trailer']['root'] = intval($matches[1]).'_'.intval($matches[2]);
		}
		if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailerData, $matches) > 0) {
			$xref['trailer']['encrypt'] = intval($matches[1]).'_'.intval($matches[2]);
		}
		if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailerData, $matches) > 0) {
			$xref['trailer']['info'] = intval($matches[1]).'_'.intval($matches[2]);
		}
		if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailerData, $matches) > 0) {
			$xref['trailer']['id'] = array();
			$xref['trailer']['id'][0] = $matches[1];
			$xref['trailer']['id'][1] = $matches[2];
		}
		if(!$xref['trailer']['info']) return false;
		if (preg_match('/Prev[\s]+([0-9]+)/i', $trailerData, $matches) > 0) {
			// $xref = self::decodeXref($pdfData,intval($matches[1]), $xref);
		}
		return $xref;
	}
	
	private static function getObjectValue($fileInfo,$xref,$infoKey){
		$dataInfoRef  = self::getObject($fileInfo,$xref['xref'][$infoKey]);
		// pr($infoKey,$dataInfoRef);
		
		if(is_string($dataInfoRef[1])) return $dataInfoRef[1];
		if(!is_array($dataInfoRef[1])) return array();
		$dataInfo = array();
		for ($i = 0; $i< count($dataInfoRef[1]);$i+=2){
			$itemKey 	= $dataInfoRef[1][$i];
			$itemValue 	= $dataInfoRef[1][$i+1];
			if(count($itemKey) == 3 && $itemKey[0] == '/'){
				$value = false;
				if($itemValue[0] == 'objref'){
					$itemValue = self::getObject($fileInfo,$xref['xref'][$itemValue[1]]);
				}
				$value = $itemValue[1];			
				if($value === false) continue;
				if(is_string($value)){
					$value = self::decodeStr($value);
				}
				$dataInfo[$itemKey[1]] = $value;
			}
		}
		return $dataInfo;
	}
	private static function getObject($fileInfo,$offset){
		$dataIndex = self::getObjectItem($fileInfo,$offset);
		$dataIndex = self::getObjectItem($fileInfo,$dataIndex[2]);
		return $dataIndex;
	}
	private static function getObjectItem($fileInfo,$offset){
		// return self::getRawObject($fileInfo['dataEnd'],$offset);
		$chunkSize  = $fileInfo['chunkSize'];
		$fileOffset = $fileInfo['size'] - $chunkSize;		
		$thePose = $offset >= $fileOffset ? ($offset - $fileOffset) : $offset;
		$theData = $offset >= $fileOffset ? $fileInfo['dataEnd']: $fileInfo['dataStart'];
		if($offset > $chunkSize && $offset <= $fileOffset ){
			$thePose = 0;
			$theData = StreamWrapperIO::read($fileInfo['path'],$offset,$chunkSize);
			// pr("getFile:$offset;$chunkSize",substr($theData,200));
		}
		$dataIndex = self::getRawObject($theData,$thePose);
		
		// 重置offset;
		if($offset >= $fileOffset){
			$dataIndex[2] = $dataIndex[2] + $fileOffset;
		}else if($offset > $chunkSize && $offset <= $fileOffset){
			$dataIndex[2] = $dataIndex[2] + $offset;
		}
		// pr('getObjectItem:',[$offset,$chunkSize,$fileOffset,$thePose],$dataIndex);
		return $dataIndex;
	}
	
	// 解析节点数据;(xxx xx obj)
	private static function decodeStr($text){
		$text = str_replace(
			['\\\\', '\\ ', '\\/', '\(', '\)', '\n', '\r', '\t'],
			['\\',   ' ',   '/',   '(',  ')',  "\n", "\r", "\t"],$text);
			
		$parts = preg_split('/(\\\\\d{3})/s', $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
        $text = '';
        foreach ($parts as $part) {
            if (preg_match('/^\\\\\d{3}$/', $part)) {
                $text .= \chr(octdec(trim($part, '\\')));
            } else {
                $text .= $part;
            }
		}
		$parts = preg_split('/(#\d{2})/s', $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
        $text = '';
        foreach ($parts as $part) {
            if (preg_match('/^#\d{2}$/', $part)) {
                $text .= \chr(hexdec(trim($part, '#')));
            } else {
                $text .= $part;
            }
		}
		
		$parts = preg_split('/(<[a-f0-9]+>)/si', $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
		$text = '';
        foreach ($parts as $part) {
            if (preg_match('/^<.*>$/s', $part) && false === stripos($part, '<?xml')) {
                $part = preg_replace("/[\r\n]/", '', $part);
                $part = trim($part, '<>');
                $part = pack('H*', $part);
                $text .= $part;
            } else {
                $text .= $part;
            }
		}
		if(preg_match('/^\xFE\xFF/i', $text)) {
            // Strip U+FEFF byte order marker.
            $decode = substr($text, 2);
            $text = '';
            $length = strlen($decode);
            for ($i = 0; $i < $length; $i += 2) {
				$hex   = hexdec(bin2hex(substr($decode, $i, 2)));
				$text .= mb_convert_encoding('&#'.intval($hex).';', 'UTF-8', 'HTML-ENTITIES');
            }
		}
		return $text;
	}
	private static function getRawObject($pdfData, $offset = 0){
        $objtype = ''; // object type to be returned
        $objval = ''; // object value to be returned
        /*
         * skip initial white space chars:
         *      \x00 null (NUL)
         *      \x09 horizontal tab (HT)
         *      \x0A line feed (LF)
         *      \x0C form feed (FF)
         *      \x0D carriage return (CR)
         *      \x20 space (SP)
         */
        $offset += strspn($pdfData, "\x00\x09\x0a\x0c\x0d\x20", $offset);
		$char = $pdfData[$offset];
		// echo "<pre>";var_dump($char,'pos='.$offset.';len='.strlen($pdfData),substr($pdfData,$offset,33));echo "</pre>";
		
        switch ($char) {
            case '%':  // \x25 PERCENT SIGN
                    // skip comment and search for next token
                    $next = strcspn($pdfData, "\r\n", $offset);
                    if ($next > 0) {
                        $offset += $next;
                        return self::getRawObject($pdfData, $offset);
                    }
                    break;
            case '/':  // \x2F SOLIDUS
                    $objtype = $char;
                    ++$offset;
                    $pregResult = preg_match(
                        '/^([^\x00\x09\x0a\x0c\x0d\x20\s\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+)/',
                        substr($pdfData, $offset, 256),
                        $matches
                    );
                    if (1 == $pregResult) {
                        $objval = $matches[1]; // unescaped value
                        $offset += strlen($objval);
                    }
                    break;
            case '(':   // \x28 LEFT PARENTHESIS
            case ')':  // \x29 RIGHT PARENTHESIS
                    // literal string object
                    $objtype = $char;
                    ++$offset;
                    $strpos = $offset;
                    if ('(' == $char) {
                        $open_bracket = 1;
                        while ($open_bracket > 0) {
							if (!isset($pdfData[$strpos])) break;
                            $ch = $pdfData[$strpos];
                            switch ($ch) {
                                case '\\':  // REVERSE SOLIDUS (5Ch) (Backslash)
									// skip next character
									++$strpos;
									break;
                                case '(':  // LEFT PARENHESIS (28h)
									++$open_bracket;
									break;
                                case ')':  // RIGHT PARENTHESIS (29h)
									--$open_bracket;
									break;
                            }
                            ++$strpos;
                        }
                        $objval = substr($pdfData, $offset, ($strpos - $offset - 1));
                        $offset = $strpos;
                    }
                    break;
            case '[':   // \x5B LEFT SQUARE BRACKET
            case ']':  // \x5D RIGHT SQUARE BRACKET
                // array object
                $objtype = $char;
                ++$offset;
                if ('[' == $char) {
                    // get array content
                    $objval = array();
                    do {
                        $oldOffset = $offset;
                        // get element
                        $element = self::getRawObject($pdfData, $offset);
                        $offset = $element[2];
                        $objval[] = $element;
                    } while ((']' != $element[0]) && ($offset != $oldOffset));
                    // remove closing delimiter
                    array_pop($objval);
                }
                break;
            case '<':  // \x3C LESS-THAN SIGN
            case '>':  // \x3E GREATER-THAN SIGN
                if (isset($pdfData[($offset + 1)]) && ($pdfData[($offset + 1)] == $char)) {
                    // dictionary object
                    $objtype = $char.$char;
                    $offset += 2;
                    if ('<' == $char) {
                        $objval = array();
                        do {
                            $oldOffset = $offset;
                            // get element
                            $element = self::getRawObject($pdfData, $offset);
                            $offset = $element[2];
                            $objval[] = $element;
                        } while (('>>' != $element[0]) && ($offset != $oldOffset));
                        // remove closing delimiter
                        array_pop($objval);
                    }
                } else {
                    // hexadecimal string object
                    $objtype = $char;
                    ++$offset;
                    $pregResult = preg_match('/^([0-9A-Fa-f\x09\x0a\x0c\x0d\x20]+)>/iU',substr($pdfData, $offset),$matches);
                    if (('<' == $char) && 1 == $pregResult) {
                        // remove white space characters
                        $objval = strtr($matches[1], "\x09\x0a\x0c\x0d\x20", '');
                        $offset += \strlen($matches[0]);
                    } elseif (false !== ($endpos = strpos($pdfData, '>', $offset))) {
                        $offset = $endpos + 1;
                    }
                }
                break;
            default:
				if ('endobj' == substr($pdfData, $offset, 6)) {
					// indirect object
					$objtype = 'endobj';
					$offset += 6;
				} elseif ('null' == substr($pdfData, $offset, 4)) {
					// null object
					$objtype = 'null';
					$offset += 4;
					$objval = 'null';
				} elseif ('true' == substr($pdfData, $offset, 4)) {
					// boolean true object
					$objtype = 'boolean';
					$offset += 4;
					$objval = 'true';
				} elseif ('false' == substr($pdfData, $offset, 5)) {
					// boolean false object
					$objtype = 'boolean';
					$offset += 5;
					$objval = 'false';
				} elseif ('stream' == substr($pdfData, $offset, 6)) {
					// start stream object
					$objtype = 'stream';
					$offset += 6;
					if (1 == preg_match('/^([\r]?[\n])/isU', substr($pdfData, $offset), $matches)) {
						$offset += strlen($matches[0]);
						$endStreamReg = '/(endstream)[\x09\x0a\x0c\x0d\x20]/isU';
						$pregResult = preg_match($endStreamReg,substr($pdfData, $offset),$matches,PREG_OFFSET_CAPTURE);
						if (1 == $pregResult) {
							$objval = substr($pdfData, $offset, $matches[0][1]);
							$offset += $matches[1][1];
						}
					}
				} elseif ('endstream' == substr($pdfData, $offset, 9)) {
					// end stream object
					$objtype = 'endstream';
					$offset += 9;
				} elseif (1 == preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($pdfData, $offset, 33), $matches)) {
					$objtype = 'objref';
					$offset += strlen($matches[0]);
					$objval = intval($matches[1]).'_'.intval($matches[2]);
				} elseif (1 == preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($pdfData, $offset, 33), $matches)) {
					$objtype = 'obj';
					$objval = intval($matches[1]).'_'.intval($matches[2]);
					$offset += strlen($matches[0]);
				} elseif (($numlen = strspn($pdfData, '+-.0123456789', $offset)) > 0) {
					$objtype = 'numeric';
					$objval = substr($pdfData, $offset, $numlen);
					$offset += $numlen;
				}
				break;
        }
        return array($objtype, $objval, $offset);
    }
}
update 2024-08-31 01:03:37 +08:00			`<?php`
			`/*`
			`* @link http://kodcloud.com/`
			`* @author warlee \| e-mail:kodcloud@qq.com`
			`* @copyright warlee 2014.(Shanghai)Co.,Ltd`
			`* @license http://kodcloud.com/tools/license/license.txt`
			`*/`


			`/**`
			`* 解析获取pdf文件信息;`
			`*`
			`* pdfparser https://www.pdfparser.org/documentation`
			`* mpdf编辑: http://mpdf.github.io/`
			`*/`
			`class FileParsePdf{`
			`public static function parse($filePath){`
			`$chunkSize = 32 * 1024;//trailer处理;`
			`$fileInfo = array(`
			`'fp' => fopen($filePath,'r'),`
			`'path' => $filePath,`
			`'size' => filesize_64($filePath),`
			`'chunkSize' => $chunkSize,`
			`);`
			`$fileInfo['dataStart'] = StreamWrapperIO::read($filePath,0,$chunkSize);`
			`$fileInfo['dataEnd'] = StreamWrapperIO::read($filePath,$fileInfo['size'] - $chunkSize,$chunkSize);`
			`// if($_GET['debug'] == '1'){`
			`// include('/Library/WebServer/Documents/localhost/test/000/test/pdfparser-0.18.1/vendor/autoload.php');`
			`// $parser = new \Smalot\PdfParser\Parser();`
			`// $pdf = $parser->parseFile($filePath);pr($pdf->getDetails());exit;`
			`// }`

			`$xref = self::decodeXref($fileInfo);`
			`if($xref){`
			`$infoKey = $xref['trailer']['info'];`
			`$dataInfo = self::getObjectValue($fileInfo,$xref,$infoKey);`
			`}`

			`$dataInfo = is_array($dataInfo) ? $dataInfo : array();`
			`// 页面尺寸处理;`
			`$dataInfo['sizeWidth'] = 0;`
			`$theReg = '/[\s]\/MediaBox[\s]\[[\s]([0-9\.]+)[\s]+([0-9\.]+)[\s]+([0-9\.]+)[\s]+([0-9\.]+)[\s]\]/i';`
			`preg_match($theReg,$fileInfo['dataStart'],$matches);`
			`if (!$dataInfo['sizeWidth'] && count($matches) == 5){`
			`$dataInfo['sizeWidth'] = $matches[3];`
			`$dataInfo['sizeHeight'] = $matches[4];`
			`}`
			`preg_match($theReg,$fileInfo['dataEnd'],$matches);`
			`if (!$dataInfo['sizeWidth'] && count($matches) == 5){`
			`$dataInfo['sizeWidth'] = $matches[3];`
			`$dataInfo['sizeHeight'] = $matches[4];`
			`}`
			`preg_match('/%PDF-([0-9\.]+)/',$fileInfo['dataStart'],$matches);`
			`if($matches){$dataInfo['version'] = $matches[1];}`

			`// // 页数计算处理; /Count 8`
			`$dataInfo['pageNumber'] = 0;`
			`$theReg = "/[\s]\/Count[\s]+([0-9]+)[\s]/i";`

			`preg_match_all($theReg,$fileInfo['dataStart'],$matches);`
			`if($matches[1] && $dataInfo['pageNumber'] < $matches[1][0]){`
			`$dataInfo['pageNumber'] = $matches[1][0];`
			`}`
			`preg_match_all($theReg,$fileInfo['dataEnd'],$matches);`
			`if($matches[1] && $dataInfo['pageNumber'] < $matches[1][0]){`
			`$dataInfo['pageNumber'] = $matches[1][0];`
			`}`

			`$dataInfo = self::parseInfoItem($dataInfo);`
			`return $dataInfo;`
			`}`
			`private static function parseInfoItem($dataInfo){`
			`if(!$dataInfo) return false;`
			`$picker = array( //数值统一筛选并处理;`
			`'title' => array('Title',''), // 标题`
			`'auther' => array('Author',''), // 作者`
			`'createTime' => array('CreationDate','date'), // 创建日期`
			`'modifyTime' => array('ModDate','date'), // 修改日期`
			`'pageNumber' => array('pageNumber','int'), // 页数`
			`'sizeWidth' => array('sizeWidth','int'), // 页面宽度`
			`'sizeHeight' => array('sizeHeight','int'), // 页面高度`
			`'creator' => array('Creator',''), // 内容创作者`
			`'producer' => array('Producer',''), // 编码软件`
			`'pdfVersion' => array('version',''), // PDF 版本;`
			`);`

			`$result = array();`
			`foreach ($picker as $key => $info){`
			`if(!isset($dataInfo[$info[0]])) continue;`
			`$value = $dataInfo[$info[0]];`
			`if(!$value \|\| is_array($value)) continue;`

			`switch($info[1]){`
			`case 'int' :$value = intval($value);break;`
			`case 'date':`
			`if(substr($value,0,2) == 'D:') {`
			`$value = substr($value,2,14);`
			`}`
			`if(strtotime($value)){`
			`$value = date('Y-m-d H:i:s',strtotime($value));`
			`}`
			`break;`
			`}`
			`$result[$key] = $value;`
			`}`
			`// pr($result,$dataInfo);exit;`
			`return $result;`
			`}`

			`private static function decodeXref(&$fileInfo){`
			`$pdfData = $fileInfo['dataEnd'];`
			`$xref = array('trailer'=>array(),'xref'=>array());`
			`$theReg = '/[\r\n]startxref[\s][\r\n]([0-9]+)[\s]*[\r\n]+%%EOF/i';`
			`if(!preg_match_all($theReg,$pdfData, $matches,PREG_SET_ORDER,0)) return false;`

			`// 结尾block索引开始位置,比最小block小则加大;`
			`$startxref = intval($matches[0][1]);`
			`if($fileInfo['size'] - $startxref > $fileInfo['chunkSize']){`
			`$chunkSize = 4 * $fileInfo['chunkSize'];`
			`$fileInfo['chunkSize'] = $chunkSize;`
			`$fileInfo['dataStart'] = StreamWrapperIO::read($fileInfo['path'],0,$chunkSize);`
			`$fileInfo['dataEnd'] = StreamWrapperIO::read($fileInfo['path'],$fileInfo['size'] - $chunkSize,$chunkSize);`
			`$pdfData = $fileInfo['dataEnd'];`
			`}`

			`$objNum = 0;`
			`// preg_match_all('/([0-9]+)[\x20]([0-9]+)[\x20]?([nf]?)(\r\n\|[\x20]?[\r\n])/',$pdfData,$matches);`
			`preg_match_all('/([0-9]+)[\x20]([0-9]+)[\x20]?([nf]?)(\r\n\|[\x20]?[\r\n])/',$pdfData,$matches);`
			`foreach ($matches[3] as $i=>$item){`
			`if($matches[3][$i] == 'n'){`
			`$index = $objNum.'_'.intval($matches[2][$i]);`
			`$xref['xref'][$index] = intval($matches[1][$i]);`
			`++$objNum;`
			`}else if ($matches[3][$i] == 'f') {`
			`++$objNum;`
			`} else {`
			`// $objNum = intval($matches[1][$i]); //从1开始;`
			`}`
			`}`

			`// 没有索引的情况处理; 优先按照的的进行匹配;`
			`if(preg_match_all('/[\r\n]([0-9]+)[\s]+([0-9]+)[\s]+obj/iU',$pdfData, $matches)){`
			`$fileOffset = $fileInfo['size'] - $fileInfo['chunkSize'];`
			`foreach ($matches[0] as $i => $theValue) {`
			`$key = $matches[1][$i].'_'.$matches[2][$i];`
			`$xref['xref'][$key] = strpos($pdfData,$theValue) + $fileOffset + 1;`
			`}`
			`}`

			`if(preg_match_all('/trailer[\s]<<(.)>>/isU',$pdfData,$matches)){`
			`$trailerData = count($matches[1]) == 1 ? $matches[1][0] : $matches[1][1];`
			`}else{// 兼容没有trailer情况的数据; 直接从文件最后正则匹配查找;`
			`$trailerData = substr($pdfData, -1024*5);`
			`}`
			`if (preg_match('/Size[\s]+([0-9]+)/i', $trailerData, $matches) > 0) {`
			`$xref['trailer']['size'] = intval($matches[1]);`
			`}`
			`if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailerData, $matches) > 0) {`
			`$xref['trailer']['root'] = intval($matches[1]).'_'.intval($matches[2]);`
			`}`
			`if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailerData, $matches) > 0) {`
			`$xref['trailer']['encrypt'] = intval($matches[1]).'_'.intval($matches[2]);`
			`}`
			`if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailerData, $matches) > 0) {`
			`$xref['trailer']['info'] = intval($matches[1]).'_'.intval($matches[2]);`
			`}`
			`if (preg_match('/ID[\s][\[][\s][<]([^>])[>][\s][<]([^>]*)[>]/i', $trailerData, $matches) > 0) {`
			`$xref['trailer']['id'] = array();`
			`$xref['trailer']['id'][0] = $matches[1];`
			`$xref['trailer']['id'][1] = $matches[2];`
			`}`
			`if(!$xref['trailer']['info']) return false;`
			`if (preg_match('/Prev[\s]+([0-9]+)/i', $trailerData, $matches) > 0) {`
			`// $xref = self::decodeXref($pdfData,intval($matches[1]), $xref);`
			`}`
			`return $xref;`
			`}`

			`private static function getObjectValue($fileInfo,$xref,$infoKey){`
			`$dataInfoRef = self::getObject($fileInfo,$xref['xref'][$infoKey]);`
			`// pr($infoKey,$dataInfoRef);`

			`if(is_string($dataInfoRef[1])) return $dataInfoRef[1];`
			`if(!is_array($dataInfoRef[1])) return array();`
			`$dataInfo = array();`
			`for ($i = 0; $i< count($dataInfoRef[1]);$i+=2){`
			`$itemKey = $dataInfoRef[1][$i];`
			`$itemValue = $dataInfoRef[1][$i+1];`
			`if(count($itemKey) == 3 && $itemKey[0] == '/'){`
			`$value = false;`
			`if($itemValue[0] == 'objref'){`
			`$itemValue = self::getObject($fileInfo,$xref['xref'][$itemValue[1]]);`
			`}`
			`$value = $itemValue[1];`
			`if($value === false) continue;`
			`if(is_string($value)){`
			`$value = self::decodeStr($value);`
			`}`
			`$dataInfo[$itemKey[1]] = $value;`
			`}`
			`}`
			`return $dataInfo;`
			`}`
			`private static function getObject($fileInfo,$offset){`
			`$dataIndex = self::getObjectItem($fileInfo,$offset);`
			`$dataIndex = self::getObjectItem($fileInfo,$dataIndex[2]);`
			`return $dataIndex;`
			`}`
			`private static function getObjectItem($fileInfo,$offset){`
			`// return self::getRawObject($fileInfo['dataEnd'],$offset);`
			`$chunkSize = $fileInfo['chunkSize'];`
			`$fileOffset = $fileInfo['size'] - $chunkSize;`
			`$thePose = $offset >= $fileOffset ? ($offset - $fileOffset) : $offset;`
			`$theData = $offset >= $fileOffset ? $fileInfo['dataEnd']: $fileInfo['dataStart'];`
			`if($offset > $chunkSize && $offset <= $fileOffset ){`
			`$thePose = 0;`
			`$theData = StreamWrapperIO::read($fileInfo['path'],$offset,$chunkSize);`
			`// pr("getFile:$offset;$chunkSize",substr($theData,200));`
			`}`
			`$dataIndex = self::getRawObject($theData,$thePose);`

			`// 重置offset;`
			`if($offset >= $fileOffset){`
			`$dataIndex[2] = $dataIndex[2] + $fileOffset;`
			`}else if($offset > $chunkSize && $offset <= $fileOffset){`
			`$dataIndex[2] = $dataIndex[2] + $offset;`
			`}`
			`// pr('getObjectItem:',[$offset,$chunkSize,$fileOffset,$thePose],$dataIndex);`
			`return $dataIndex;`
			`}`

			`// 解析节点数据;(xxx xx obj)`
			`private static function decodeStr($text){`
			`$text = str_replace(`
			`['\\\\', '\\ ', '\\/', '\(', '\)', '\n', '\r', '\t'],`
			`['\\', ' ', '/', '(', ')', "\n", "\r", "\t"],$text);`

			`$parts = preg_split('/(\\\\\d{3})/s', $text, -1, PREG_SPLIT_NO_EMPTY \| PREG_SPLIT_DELIM_CAPTURE);`
			`$text = '';`
			`foreach ($parts as $part) {`
			`if (preg_match('/^\\\\\d{3}$/', $part)) {`
			`$text .= \chr(octdec(trim($part, '\\')));`
			`} else {`
			`$text .= $part;`
			`}`
			`}`
			`$parts = preg_split('/(#\d{2})/s', $text, -1, PREG_SPLIT_NO_EMPTY \| PREG_SPLIT_DELIM_CAPTURE);`
			`$text = '';`
			`foreach ($parts as $part) {`
			`if (preg_match('/^#\d{2}$/', $part)) {`
			`$text .= \chr(hexdec(trim($part, '#')));`
			`} else {`
			`$text .= $part;`
			`}`
			`}`

			`$parts = preg_split('/(<[a-f0-9]+>)/si', $text, -1, PREG_SPLIT_NO_EMPTY \| PREG_SPLIT_DELIM_CAPTURE);`
			`$text = '';`
			`foreach ($parts as $part) {`
			`if (preg_match('/^<.*>$/s', $part) && false === stripos($part, '<?xml')) {`
			`$part = preg_replace("/[\r\n]/", '', $part);`
			`$part = trim($part, '<>');`
			`$part = pack('H*', $part);`
			`$text .= $part;`
			`} else {`
			`$text .= $part;`
			`}`
			`}`
			`if(preg_match('/^\xFE\xFF/i', $text)) {`
			`// Strip U+FEFF byte order marker.`
			`$decode = substr($text, 2);`
			`$text = '';`
			`$length = strlen($decode);`
			`for ($i = 0; $i < $length; $i += 2) {`
			`$hex = hexdec(bin2hex(substr($decode, $i, 2)));`
			`$text .= mb_convert_encoding('&#'.intval($hex).';', 'UTF-8', 'HTML-ENTITIES');`
			`}`
			`}`
			`return $text;`
			`}`
			`private static function getRawObject($pdfData, $offset = 0){`
			`$objtype = ''; // object type to be returned`
			`$objval = ''; // object value to be returned`
			`/*`
			`* skip initial white space chars:`
			`* \x00 null (NUL)`
			`* \x09 horizontal tab (HT)`
			`* \x0A line feed (LF)`
			`* \x0C form feed (FF)`
			`* \x0D carriage return (CR)`
			`* \x20 space (SP)`
			`*/`
			`$offset += strspn($pdfData, "\x00\x09\x0a\x0c\x0d\x20", $offset);`
			`$char = $pdfData[$offset];`
			`// echo "<pre>";var_dump($char,'pos='.$offset.';len='.strlen($pdfData),substr($pdfData,$offset,33));echo "</pre>";`

			`switch ($char) {`
			`case '%': // \x25 PERCENT SIGN`
			`// skip comment and search for next token`
			`$next = strcspn($pdfData, "\r\n", $offset);`
			`if ($next > 0) {`
			`$offset += $next;`
			`return self::getRawObject($pdfData, $offset);`
			`}`
			`break;`
			`case '/': // \x2F SOLIDUS`
			`$objtype = $char;`
			`++$offset;`
			`$pregResult = preg_match(`
			`'/^([^\x00\x09\x0a\x0c\x0d\x20\s\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+)/',`
			`substr($pdfData, $offset, 256),`
			`$matches`
			`);`
			`if (1 == $pregResult) {`
			`$objval = $matches[1]; // unescaped value`
			`$offset += strlen($objval);`
			`}`
			`break;`
			`case '(': // \x28 LEFT PARENTHESIS`
			`case ')': // \x29 RIGHT PARENTHESIS`
			`// literal string object`
			`$objtype = $char;`
			`++$offset;`
			`$strpos = $offset;`
			`if ('(' == $char) {`
			`$open_bracket = 1;`
			`while ($open_bracket > 0) {`
			`if (!isset($pdfData[$strpos])) break;`
			`$ch = $pdfData[$strpos];`
			`switch ($ch) {`
			`case '\\': // REVERSE SOLIDUS (5Ch) (Backslash)`
			`// skip next character`
			`++$strpos;`
			`break;`
			`case '(': // LEFT PARENHESIS (28h)`
			`++$open_bracket;`
			`break;`
			`case ')': // RIGHT PARENTHESIS (29h)`
			`--$open_bracket;`
			`break;`
			`}`
			`++$strpos;`
			`}`
			`$objval = substr($pdfData, $offset, ($strpos - $offset - 1));`
			`$offset = $strpos;`
			`}`
			`break;`
			`case '[': // \x5B LEFT SQUARE BRACKET`
			`case ']': // \x5D RIGHT SQUARE BRACKET`
			`// array object`
			`$objtype = $char;`
			`++$offset;`
			`if ('[' == $char) {`
			`// get array content`
			`$objval = array();`
			`do {`
			`$oldOffset = $offset;`
			`// get element`
			`$element = self::getRawObject($pdfData, $offset);`
			`$offset = $element[2];`
			`$objval[] = $element;`
			`} while ((']' != $element[0]) && ($offset != $oldOffset));`
			`// remove closing delimiter`
			`array_pop($objval);`
			`}`
			`break;`
			`case '<': // \x3C LESS-THAN SIGN`
			`case '>': // \x3E GREATER-THAN SIGN`
			`if (isset($pdfData[($offset + 1)]) && ($pdfData[($offset + 1)] == $char)) {`
			`// dictionary object`
			`$objtype = $char.$char;`
			`$offset += 2;`
			`if ('<' == $char) {`
			`$objval = array();`
			`do {`
			`$oldOffset = $offset;`
			`// get element`
			`$element = self::getRawObject($pdfData, $offset);`
			`$offset = $element[2];`
			`$objval[] = $element;`
			`} while (('>>' != $element[0]) && ($offset != $oldOffset));`
			`// remove closing delimiter`
			`array_pop($objval);`
			`}`
			`} else {`
			`// hexadecimal string object`
			`$objtype = $char;`
			`++$offset;`
			`$pregResult = preg_match('/^([0-9A-Fa-f\x09\x0a\x0c\x0d\x20]+)>/iU',substr($pdfData, $offset),$matches);`
			`if (('<' == $char) && 1 == $pregResult) {`
			`// remove white space characters`
			`$objval = strtr($matches[1], "\x09\x0a\x0c\x0d\x20", '');`
			`$offset += \strlen($matches[0]);`
			`} elseif (false !== ($endpos = strpos($pdfData, '>', $offset))) {`
			`$offset = $endpos + 1;`
			`}`
			`}`
			`break;`
			`default:`
			`if ('endobj' == substr($pdfData, $offset, 6)) {`
			`// indirect object`
			`$objtype = 'endobj';`
			`$offset += 6;`
			`} elseif ('null' == substr($pdfData, $offset, 4)) {`
			`// null object`
			`$objtype = 'null';`
			`$offset += 4;`
			`$objval = 'null';`
			`} elseif ('true' == substr($pdfData, $offset, 4)) {`
			`// boolean true object`
			`$objtype = 'boolean';`
			`$offset += 4;`
			`$objval = 'true';`
			`} elseif ('false' == substr($pdfData, $offset, 5)) {`
			`// boolean false object`
			`$objtype = 'boolean';`
			`$offset += 5;`
			`$objval = 'false';`
			`} elseif ('stream' == substr($pdfData, $offset, 6)) {`
			`// start stream object`
			`$objtype = 'stream';`
			`$offset += 6;`
			`if (1 == preg_match('/^([\r]?[\n])/isU', substr($pdfData, $offset), $matches)) {`
			`$offset += strlen($matches[0]);`
			`$endStreamReg = '/(endstream)[\x09\x0a\x0c\x0d\x20]/isU';`
			`$pregResult = preg_match($endStreamReg,substr($pdfData, $offset),$matches,PREG_OFFSET_CAPTURE);`
			`if (1 == $pregResult) {`
			`$objval = substr($pdfData, $offset, $matches[0][1]);`
			`$offset += $matches[1][1];`
			`}`
			`}`
			`} elseif ('endstream' == substr($pdfData, $offset, 9)) {`
			`// end stream object`
			`$objtype = 'endstream';`
			`$offset += 9;`
			`} elseif (1 == preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($pdfData, $offset, 33), $matches)) {`
			`$objtype = 'objref';`
			`$offset += strlen($matches[0]);`
			`$objval = intval($matches[1]).'_'.intval($matches[2]);`
			`} elseif (1 == preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($pdfData, $offset, 33), $matches)) {`
			`$objtype = 'obj';`
			`$objval = intval($matches[1]).'_'.intval($matches[2]);`
			`$offset += strlen($matches[0]);`
			`} elseif (($numlen = strspn($pdfData, '+-.0123456789', $offset)) > 0) {`
			`$objtype = 'numeric';`
			`$objval = substr($pdfData, $offset, $numlen);`
			`$offset += $numlen;`
			`}`
			`break;`
			`}`
			`return array($objtype, $objval, $offset);`
			`}`
			`}`