<a href="http://example.com/">link text</a>
* Version: 1.0
* Author: Janis Elsts
* ModuleID: link
* ModuleCategory: parser
* ModuleClassName: blcHTMLLink
* ModuleContext: on-demand
* ModuleLazyInit: true
* ModulePriority: 1000
*/
class blcHTMLLink extends blcParser {
var $supported_formats = array( 'html' );
/**
* Parse a string for HTML links - anchor text
*
* @param string $content The text to parse.
* @param string $base_url The base URL to use for normalizing relative URLs. If ommitted, the blog's root URL will be used.
* @param string $default_link_text
* @return array An array of new blcLinkInstance objects. The objects will include info about the links found, but not about the corresponding container entity.
*/
function parse( $content, $base_url = '', $default_link_text = '' ) {
$content = apply_filters( 'blc-parser-html-link-content', $content );
//remove all
blocks first
$content = preg_replace( '/]*>.+?<\/code>/si', ' ', $content );
//Find links
$params = array(
'base_url' => $base_url,
'default_link_text' => $default_link_text,
);
$instances = $this->map( $content, array( $this, 'parser_callback' ), $params );
//The parser callback returns NULL when it finds an invalid link. Filter out those nulls
//from the list of instances.
$instances = array_filter( $instances );
return $instances;
}
/**
* blcHTMLLink::parser_callback()
*
* @access private
*
* @param array $link
* @param array $params
* @return blcLinkInstance|null
*/
function parser_callback( $link, $params ) {
global $blclog;
$base_url = $params['base_url'];
$url = $link['href'];
$raw_url = $link['href'];
$url = trim( $url );
//$blclog->debug(__CLASS__ .':' . __FUNCTION__ . ' Found a link, raw URL = "' . $raw_url . '"');
//Sometimes links may contain shortcodes. Execute them.
$url = do_shortcode( $url );
//Skip empty URLs
if ( empty( $url ) ) {
$blclog->warn( __CLASS__ . ':' . __FUNCTION__ . ' Skipping the link (empty URL)' );
return null;
}
//Attempt to parse the URL
$parts = @parse_url( $url );
if ( ! $parts ) {
$blclog->warn( __CLASS__ . ':' . __FUNCTION__ . ' Skipping the link (parse_url failed)', $url );
return null; //Skip invalid URLs
}
if ( ! isset( $parts['scheme'] ) ) {
//No scheme - likely a relative URL. Turn it into an absolute one.
//TODO: Also log the original URL and base URL.
$url = $this->relative2absolute( $url, $base_url ); //$base_url comes from $params
$blclog->info( __CLASS__ . ':' . __FUNCTION__ . ' Convert relative URL to absolute. Absolute URL = "' . $url . '"' );
}
//Skip invalid links (again)
if ( ! $url || ( strlen( $url ) < 6 ) ) {
$blclog->info( __CLASS__ . ':' . __FUNCTION__ . ' Skipping the link (invalid/short URL)', $url );
return null;
}
//Remove left-to-right marks. See: https://en.wikipedia.org/wiki/Left-to-right_mark
$ltrm = json_decode( '"\u200E"' );
$url = str_replace( $ltrm, '', $url );
$text = $link['#link_text'];
//The URL is okay, create and populate a new link instance.
$instance = new blcLinkInstance();
$instance->set_parser( $this );
$instance->raw_url = $raw_url;
$instance->link_text = $text;
$link_obj = new blcLink( $url ); //Creates or loads the link
$instance->set_link( $link_obj );
return $instance;
}
/**
* Change all links that have a certain URL to a new URL.
*
* @param string $content Look for links in this string.
* @param string $new_url Change the links to this URL.
* @param string $old_url The URL to look for.
* @param string $old_raw_url The raw, not-normalized URL of the links to look for. Optional.
* @param string $new_text New link text. Optional.
*
* @return array|WP_Error If successful, the return value will be an associative array with two
* keys : 'content' - the modified content, and 'raw_url' - the new raw, non-normalized URL used
* for the modified links. In most cases, the returned raw_url will be equal to the new_url.
*/
function edit( $content, $new_url, $old_url, $old_raw_url, $new_text = null ) {
if ( empty( $old_raw_url ) ) {
$old_raw_url = $old_url;
}
//Save the old & new URLs for use in the edit callback.
$args = array(
'old_url' => $old_raw_url,
'new_url' => $new_url,
'new_text' => $new_text,
);
//Find all links and replace those that match $old_url.
$content = $this->multi_edit( $content, array( &$this, 'edit_callback' ), $args );
$result = array(
'content' => $content,
'raw_url' => $new_url,
);
if ( isset( $new_text ) ) {
$result['link_text'] = $new_text;
}
return $result;
}
function edit_callback( $link, $params ) {
if ( $link['href'] === $params['old_url'] ) {
$modified = array(
'href' => $params['new_url'],
);
if ( isset( $params['new_text'] ) ) {
$modified['#link_text'] = $params['new_text'];
}
return $modified;
} else {
return $link['#raw'];
}
}
public function is_link_text_editable() {
return true;
}
public function is_url_editable() {
return true;
}
/**
* Remove all links that have a certain URL, leaving anchor text intact.
*
* @param string $content Look for links in this string.
* @param string $url The URL to look for.
* @param string $raw_url The raw, non-normalized version of the URL to look for. Optional.
* @return string Input string with all matching links removed.
*/
function unlink( $content, $url, $raw_url ) {
if ( empty( $raw_url ) ) {
$raw_url = $url;
}
$args = array(
'old_url' => $raw_url,
);
//Find all links and remove those that match $raw_url.
$content = $this->multi_edit( $content, array( &$this, 'unlink_callback' ), $args );
return $content;
}
/**
* blcHTMLLink::unlink_callback()
*
* @access private
*
* @param array $link
* @param array $params
* @return string
*/
function unlink_callback( $link, $params ) {
//Skip links that don't match the specified URL
if ( $link['href'] !== $params['old_url'] ) {
return $link['#raw'];
}
$config = blc_get_configuration();
if ( $config->options['mark_removed_links'] ) {
//Leave only the anchor text + the removed_link CSS class
return sprintf(
'%s',
esc_attr( $link['href'] ),
$link['#link_text']
);
} else {
//Just the anchor text
return $link['#link_text'];
}
}
/**
* Get the link text for printing in the "Broken Links" table.
* Sub-classes should override this method and display the link text in a way appropriate for the link type.
*
* @param blcLinkInstance $instance
* @param string $context
* @return string HTML
*/
function ui_get_link_text( $instance, $context = 'display' ) {
return strip_tags( $instance->link_text );
}
/**
* Apply a callback function to all HTML links found in a string and return the results.
*
* The link data array will contain at least these keys :
* 'href' - the URL of the link (with htmlentitydecode() already applied).
* '#raw' - the raw link code, e.g. the entire '...' tag of a HTML link.
* '#offset' - the offset within $content at which the first character of the link tag was found.
* '#link_text' - the link's anchor text, if any. May contain HTML tags.
*
* Any attributes of the link tag will also be included in the returned array as attr_name => attr_value
* pairs. This function will also automatically decode any HTML entities found in attribute values.
*
* @see blcParser::map()
*
* @param string $content A text string to parse for links.
* @param callback $callback Callback function to apply to all found links.
* @param mixed $extra If the optional $extra param. is supplied, it will be passed as the second parameter to the function $callback.
* @return array An array of all detected links after applying $callback to each of them.
*/
function map( $content, $callback, $extra = null ) {
$results = array();
//Find all links
$links = blcUtility::extract_tags( $content, 'a', false, true );
//Iterate over the links and apply $callback to each
foreach ( $links as $link ) {
//Massage the found link into a form required for the callback function
$param = $link['attributes'];
$param = array_merge(
$param,
array(
'#raw' => $link['full_tag'],
'#offset' => $link['offset'],
'#link_text' => $link['contents'],
'href' => isset( $link['attributes']['href'] ) ? $link['attributes']['href'] : '',
)
);
//Prepare arguments for the callback
$params = array( $param );
if ( isset( $extra ) ) {
$params[] = $extra;
}
//Execute & store :)
$results[] = call_user_func_array( $callback, $params );
}
return $results;
}
/**
* Modify all HTML links found in a string using a callback function.
*
* The callback function should return either an associative array or a string. If
* a string is returned, the parser will replace the current link with the contents
* of that string. If an array is returned, the current link will be modified/rebuilt
* by substituting the new values for the old ones.
*
* htmlentities() will be automatically applied to attribute values (but not to #link_text).
*
* @see blcParser::multi_edit()
*
* @param string $content A text string containing the links to edit.
* @param callback $callback Callback function used to modify the links.
* @param mixed $extra If supplied, $extra will be passed as the second parameter to the function $callback.
* @return string The modified input string.
*/
function multi_edit( $content, $callback, $extra = null ) {
//Just reuse map() + a little helper func. to apply the callback to all links and get modified links
$modified_links = $this->map( $content, array( &$this, 'execute_edit_callback' ), array( $callback, $extra ) );
//Replace each old link with the modified one
$offset = 0;
foreach ( $modified_links as $link ) {
if ( isset( $link['#new_raw'] ) ) {
$new_html = $link['#new_raw'];
} else {
//Assemble the new link tag
$new_html = ' $value ) {
//Skip special keys like '#raw' and '#offset'
if ( substr( $name, 0, 1 ) === '#' ) {
continue;
}
$new_html .= sprintf( ' %s="%s"', $name, esc_attr( $value ) );
}
$new_html .= '>' . $link['#link_text'] . '';
}
$content = substr_replace( $content, $new_html, $link['#offset'] + $offset, strlen( $link['#raw'] ) );
//Update the replacement offset
$offset += ( strlen( $new_html ) - strlen( $link['#raw'] ) );
}
return $content;
}
/**
* Helper function for blcHtmlLink::multi_edit()
* Applies the specified callback function to each link and merges
* the result with the current link attributes. If the callback returns
* a replacement HTML tag instead, it will be stored in the '#new_raw'
* key of the return array.
*
* @access protected
*
* @param array $link
* @param array $info The callback function and the extra argument to pass to that function (if any).
* @return array
*/
function execute_edit_callback( $link, $info ) {
list($callback, $extra) = $info;
//Prepare arguments for the callback
$params = array( $link );
if ( isset( $extra ) ) {
$params[] = $extra;
}
$new_link = call_user_func_array( $callback, $params );
if ( is_array( $new_link ) ) {
$link = array_merge( $link, $new_link );
} elseif ( is_string( $new_link ) ) {
$link['#new_raw'] = $new_link;
}
return $link;
}
}