Server : Apache/2.4.41 (Ubuntu) System : Linux journalup 5.4.0-198-generic #218-Ubuntu SMP Fri Sep 27 20:18:53 UTC 2024 x86_64 User : www-data ( 33) PHP Version : 7.4.33 Disable Function : pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wifcontinued,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_get_handler,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,pcntl_async_signals,pcntl_unshare, Directory : /var/www/html/lib/pkp/classes/search/ |
<?php /** * @file classes/search/SearchHTMLParser.inc.php * * Copyright (c) 2014-2020 Simon Fraser University * Copyright (c) 2000-2020 John Willinsky * Distributed under the GNU GPL v3. For full terms see the file docs/COPYING. * * @class SearchHTMLParser * @ingroup search * * @brief Class to extract text from an HTML file. */ import('lib.pkp.classes.search.SearchFileParser'); import('lib.pkp.classes.core.PKPString'); class SearchHTMLParser extends SearchFileParser { function doRead() { // strip HTML tags from the read line $line = fgetss($this->fp, 4096); // convert HTML entities to valid UTF-8 characters $line = html_entity_decode($line, ENT_COMPAT, 'UTF-8'); // slightly (~10%) faster than above, but not quite as accurate, and requires html_entity_decode() // $line = html_entity_decode($line, ENT_COMPAT, strtoupper(Config::getVar('i18n', 'client_charset'))); return $line; } }