<a href="http://example.com/">link text</a> * Version: 1.0 * Author: Janis Elsts * ModuleID: link * ModuleCategory: parser * ModuleClassName: blcHTMLLink * ModuleContext: on-demand * ModuleLazyInit: true * ModulePriority: 1000 */ class blcHTMLLink extends blcParser { var $supported_formats = array( 'html' ); /** * Parse a string for HTML links - anchor text * * @param string $content The text to parse. * @param string $base_url The base URL to use for normalizing relative URLs. If ommitted, the blog's root URL will be used. * @param string $default_link_text * @return array An array of new blcLinkInstance objects. The objects will include info about the links found, but not about the corresponding container entity. */ function parse( $content, $base_url = '', $default_link_text = '' ) { $content = apply_filters( 'blc-parser-html-link-content', $content ); //remove all blocks first $content = preg_replace( '/]*>.+?<\/code>/si', ' ', $content ); //Find links $params = array( 'base_url' => $base_url, 'default_link_text' => $default_link_text, ); $instances = $this->map( $content, array( $this, 'parser_callback' ), $params ); //The parser callback returns NULL when it finds an invalid link. Filter out those nulls //from the list of instances. $instances = array_filter( $instances ); return $instances; } /** * blcHTMLLink::parser_callback() * * @access private * * @param array $link * @param array $params * @return blcLinkInstance|null */ function parser_callback( $link, $params ) { global $blclog; $base_url = $params['base_url']; $url = $link['href']; $raw_url = $link['href']; $url = trim( $url ); //$blclog->debug(__CLASS__ .':' . __FUNCTION__ . ' Found a link, raw URL = "' . $raw_url . '"'); //Sometimes links may contain shortcodes. Execute them. $url = do_shortcode( $url ); //Skip empty URLs if ( empty( $url ) ) { $blclog->warn( __CLASS__ . ':' . __FUNCTION__ . ' Skipping the link (empty URL)' ); return null; } //Attempt to parse the URL $parts = @parse_url( $url ); if ( ! $parts ) { $blclog->warn( __CLASS__ . ':' . __FUNCTION__ . ' Skipping the link (parse_url failed)', $url ); return null; //Skip invalid URLs } if ( ! isset( $parts['scheme'] ) ) { //No scheme - likely a relative URL. Turn it into an absolute one. //TODO: Also log the original URL and base URL. $url = $this->relative2absolute( $url, $base_url ); //$base_url comes from $params $blclog->info( __CLASS__ . ':' . __FUNCTION__ . ' Convert relative URL to absolute. Absolute URL = "' . $url . '"' ); } //Skip invalid links (again) if ( ! $url || ( strlen( $url ) < 6 ) ) { $blclog->info( __CLASS__ . ':' . __FUNCTION__ . ' Skipping the link (invalid/short URL)', $url ); return null; } //Remove left-to-right marks. See: https://en.wikipedia.org/wiki/Left-to-right_mark $ltrm = json_decode( '"\u200E"' ); $url = str_replace( $ltrm, '', $url ); $text = $link['#link_text']; //The URL is okay, create and populate a new link instance. $instance = new blcLinkInstance(); $instance->set_parser( $this ); $instance->raw_url = $raw_url; $instance->link_text = $text; $link_obj = new blcLink( $url ); //Creates or loads the link $instance->set_link( $link_obj ); return $instance; } /** * Change all links that have a certain URL to a new URL. * * @param string $content Look for links in this string. * @param string $new_url Change the links to this URL. * @param string $old_url The URL to look for. * @param string $old_raw_url The raw, not-normalized URL of the links to look for. Optional. * @param string $new_text New link text. Optional. * * @return array|WP_Error If successful, the return value will be an associative array with two * keys : 'content' - the modified content, and 'raw_url' - the new raw, non-normalized URL used * for the modified links. In most cases, the returned raw_url will be equal to the new_url. */ function edit( $content, $new_url, $old_url, $old_raw_url, $new_text = null ) { if ( empty( $old_raw_url ) ) { $old_raw_url = $old_url; } //Save the old & new URLs for use in the edit callback. $args = array( 'old_url' => $old_raw_url, 'new_url' => $new_url, 'new_text' => $new_text, ); //Find all links and replace those that match $old_url. $content = $this->multi_edit( $content, array( &$this, 'edit_callback' ), $args ); $result = array( 'content' => $content, 'raw_url' => $new_url, ); if ( isset( $new_text ) ) { $result['link_text'] = $new_text; } return $result; } function edit_callback( $link, $params ) { if ( $link['href'] === $params['old_url'] ) { $modified = array( 'href' => $params['new_url'], ); if ( isset( $params['new_text'] ) ) { $modified['#link_text'] = $params['new_text']; } return $modified; } else { return $link['#raw']; } } public function is_link_text_editable() { return true; } public function is_url_editable() { return true; } /** * Remove all links that have a certain URL, leaving anchor text intact. * * @param string $content Look for links in this string. * @param string $url The URL to look for. * @param string $raw_url The raw, non-normalized version of the URL to look for. Optional. * @return string Input string with all matching links removed. */ function unlink( $content, $url, $raw_url ) { if ( empty( $raw_url ) ) { $raw_url = $url; } $args = array( 'old_url' => $raw_url, ); //Find all links and remove those that match $raw_url. $content = $this->multi_edit( $content, array( &$this, 'unlink_callback' ), $args ); return $content; } /** * blcHTMLLink::unlink_callback() * * @access private * * @param array $link * @param array $params * @return string */ function unlink_callback( $link, $params ) { //Skip links that don't match the specified URL if ( $link['href'] !== $params['old_url'] ) { return $link['#raw']; } $config = blc_get_configuration(); if ( $config->options['mark_removed_links'] ) { //Leave only the anchor text + the removed_link CSS class return sprintf( '%s', esc_attr( $link['href'] ), $link['#link_text'] ); } else { //Just the anchor text return $link['#link_text']; } } /** * Get the link text for printing in the "Broken Links" table. * Sub-classes should override this method and display the link text in a way appropriate for the link type. * * @param blcLinkInstance $instance * @param string $context * @return string HTML */ function ui_get_link_text( $instance, $context = 'display' ) { return strip_tags( $instance->link_text ); } /** * Apply a callback function to all HTML links found in a string and return the results. * * The link data array will contain at least these keys : * 'href' - the URL of the link (with htmlentitydecode() already applied). * '#raw' - the raw link code, e.g. the entire '...' tag of a HTML link. * '#offset' - the offset within $content at which the first character of the link tag was found. * '#link_text' - the link's anchor text, if any. May contain HTML tags. * * Any attributes of the link tag will also be included in the returned array as attr_name => attr_value * pairs. This function will also automatically decode any HTML entities found in attribute values. * * @see blcParser::map() * * @param string $content A text string to parse for links. * @param callback $callback Callback function to apply to all found links. * @param mixed $extra If the optional $extra param. is supplied, it will be passed as the second parameter to the function $callback. * @return array An array of all detected links after applying $callback to each of them. */ function map( $content, $callback, $extra = null ) { $results = array(); //Find all links $links = blcUtility::extract_tags( $content, 'a', false, true ); //Iterate over the links and apply $callback to each foreach ( $links as $link ) { //Massage the found link into a form required for the callback function $param = $link['attributes']; $param = array_merge( $param, array( '#raw' => $link['full_tag'], '#offset' => $link['offset'], '#link_text' => $link['contents'], 'href' => isset( $link['attributes']['href'] ) ? $link['attributes']['href'] : '', ) ); //Prepare arguments for the callback $params = array( $param ); if ( isset( $extra ) ) { $params[] = $extra; } //Execute & store :) $results[] = call_user_func_array( $callback, $params ); } return $results; } /** * Modify all HTML links found in a string using a callback function. * * The callback function should return either an associative array or a string. If * a string is returned, the parser will replace the current link with the contents * of that string. If an array is returned, the current link will be modified/rebuilt * by substituting the new values for the old ones. * * htmlentities() will be automatically applied to attribute values (but not to #link_text). * * @see blcParser::multi_edit() * * @param string $content A text string containing the links to edit. * @param callback $callback Callback function used to modify the links. * @param mixed $extra If supplied, $extra will be passed as the second parameter to the function $callback. * @return string The modified input string. */ function multi_edit( $content, $callback, $extra = null ) { //Just reuse map() + a little helper func. to apply the callback to all links and get modified links $modified_links = $this->map( $content, array( &$this, 'execute_edit_callback' ), array( $callback, $extra ) ); //Replace each old link with the modified one $offset = 0; foreach ( $modified_links as $link ) { if ( isset( $link['#new_raw'] ) ) { $new_html = $link['#new_raw']; } else { //Assemble the new link tag $new_html = ' $value ) { //Skip special keys like '#raw' and '#offset' if ( substr( $name, 0, 1 ) === '#' ) { continue; } $new_html .= sprintf( ' %s="%s"', $name, esc_attr( $value ) ); } $new_html .= '>' . $link['#link_text'] . ''; } $content = substr_replace( $content, $new_html, $link['#offset'] + $offset, strlen( $link['#raw'] ) ); //Update the replacement offset $offset += ( strlen( $new_html ) - strlen( $link['#raw'] ) ); } return $content; } /** * Helper function for blcHtmlLink::multi_edit() * Applies the specified callback function to each link and merges * the result with the current link attributes. If the callback returns * a replacement HTML tag instead, it will be stored in the '#new_raw' * key of the return array. * * @access protected * * @param array $link * @param array $info The callback function and the extra argument to pass to that function (if any). * @return array */ function execute_edit_callback( $link, $info ) { list($callback, $extra) = $info; //Prepare arguments for the callback $params = array( $link ); if ( isset( $extra ) ) { $params[] = $extra; } $new_link = call_user_func_array( $callback, $params ); if ( is_array( $new_link ) ) { $link = array_merge( $link, $new_link ); } elseif ( is_string( $new_link ) ) { $link['#new_raw'] = $new_link; } return $link; } }