ml ', 'html class="' . $add_classes . '" ', $match[0] ); } $content = str_replace( $match[0], $new_tag, $content ); } } return $content; } /** * Adds a filter with detected images tags and the content. * * @param string $content The HTML content. * * @return mixed */ public function process_images_from_content( $content ) { if ( self::should_ignore_image_tags() ) { return $content; } $images = self::parse_images_from_html( $content ); if ( empty( $images ) ) { return $content; } return apply_filters( 'optml_content_images_tags', $content, $images ); } /** * Check if we are on a amp endpoint. * * IMPORTANT: This needs to be used after parse_query hook, otherwise will return false positives. * * @return bool */ public static function should_ignore_image_tags() { // Ignore image tag replacement in feed context as we don't need it. if ( is_feed() ) { return true; } // Ignore image tags replacement in amp context as they are not available. if ( function_exists( 'is_amp_endpoint' ) ) { return is_amp_endpoint(); } if ( function_exists( 'ampforwp_is_amp_endpoint' ) ) { return ampforwp_is_amp_endpoint(); } return apply_filters( 'optml_should_ignore_image_tags', false ) === true; } /** * Match all images and any relevant tags in a block of HTML. * * @param string $content Some HTML. * * @return array An array of $images matches, where $images[0] is * an array of full matches, and the link_url, img_tag, * and img_url keys are arrays of those matches. */ public static function parse_images_from_html( $content ) { $images = []; $header_start = null; $header_end = null; if ( preg_match( '//ismU', $content, $matches, PREG_OFFSET_CAPTURE ) === 1 ) { $header_start = $matches[0][1]; $header_end = $header_start + strlen( $matches[0][0] ); } $regex = '/(?:]+?href=["|\'](?P[^\s]+?)["|\'][^>]*?>\s*)?(?P(?:\s*)?]*?\s?(?:' . implode( '|', array_merge( [ 'src' ], Optml_Tag_Replacer::possible_src_attributes() ) ) . ')=["\'\\\\]*?(?P[' . Optml_Config::$chars . ']{10,}).*?>(?:\s*<\/noscript\s*>)?){1}(?:\s*<\/a>)?/ismu'; if ( preg_match_all( $regex, $content, $images, PREG_OFFSET_CAPTURE ) ) { if ( OPTML_DEBUG ) { do_action( 'optml_log', $images ); } foreach ( $images as $key => $unused ) { // Simplify the output as much as possible, mostly for confirming test results. if ( is_numeric( $key ) && $key > 0 ) { unset( $images[ $key ] ); continue; } $is_no_script = false; foreach ( $unused as $url_key => $url_value ) { if ( $key === 'img_url' ) { $images[ $key ][ $url_key ] = rtrim( $url_value[0], '\\' ); continue; } $images[ $key ][ $url_key ] = $url_value[0]; if ( $key === 0 ) { $images['in_header'][ $url_key ] = $header_start !== null ? ( $url_value[1] > $header_start && $url_value[1] < $header_end ) : false; // Check if we are in the noscript context. if ( $is_no_script === false ) { $is_no_script = strpos( $images[0][ $url_key ], 'extract_urls_from_content( $html ); if ( OPTML_DEBUG ) { do_action( 'optml_log', 'matched urls' ); do_action( 'optml_log', $extracted_urls ); } return $this->do_url_replacement( $html, $extracted_urls ); } /** * Method to extract assets from content. * * @param string $content The HTML content. * * @return array */ public function extract_urls_from_content( $content ) { $extensions = array_keys( Optml_Config::$image_extensions ); if ( $this->settings->use_cdn() && ! self::should_ignore_image_tags() ) { $extensions = array_merge( $extensions, array_keys( Optml_Config::$assets_extensions ) ); } $regex = '/(?:[(|\s\';",=\]])((?:http|\/|\\\\){1}(?:[' . Optml_Config::$chars . ']{10,}\.(?:' . implode( '|', $extensions ) . ')))(?=(?:http|>|%3F|\?|"|&|,|\s|\'|\)|\||\\\\|}|\[))/Uu'; preg_match_all( $regex, $content, $urls ); return $this->normalize_urls( $urls[1] ); } /** * Normalize extracted urls. * * @param array $urls Raw urls extracted. * * @return array Normalized array. */ private function normalize_urls( $urls ) { $urls = array_map( function ( $value ) { $value = str_replace( '"', '', $value ); return rtrim( $value, '\\";\'' ); }, $urls ); $urls = array_unique( $urls ); return array_values( $urls ); } /** * Process string content and replace possible urls. * * @param string $html String content. * @param array $extracted_urls Urls to check. * * @return string Processed html. */ public function do_url_replacement( $html, $extracted_urls ) { $extracted_urls = apply_filters( 'optml_extracted_urls', $extracted_urls ); if ( empty( $extracted_urls ) ) { return $html; } $slashed_config = addcslashes( Optml_Config::$service_url, '/' ); $extracted_urls = array_filter( $extracted_urls, function ( $value ) use ( $slashed_config ) { return strpos( $value, Optml_Config::$service_url ) === false && strpos( $value, $slashed_config ) === false || Optml_Media_Offload::is_not_processed_image( $value ) || $this->tag_replacer->url_has_dam_flag( $value ); } ); $upload_resource = $this->tag_replacer->get_upload_resource(); $urls = array_combine( $extracted_urls, $extracted_urls ); $urls = array_map( function ( $url ) use ( $upload_resource ) { $is_slashed = strpos( $url, '\/' ) !== false; $is_relative = strpos( $url, $is_slashed ? addcslashes( $upload_resource['content_path'], '/' ) : $upload_resource['content_path'] ) === 0; if ( $is_relative ) { $url = $upload_resource['content_host'] . $url; } return apply_filters( 'optml_content_url', $url ); }, $urls ); foreach ( $urls as $origin => $replace ) { $html = preg_replace( '/(?