-
-
Save getmanzooronline/61e341cb8de3d98ec12b to your computer and use it in GitHub Desktop.
| <?php | |
| /** | |
| * Truncates text. | |
| * | |
| * Cuts a string to the length of $length and replaces the last characters | |
| * with the ending if the text is longer than length. | |
| * | |
| * @param string $text String to truncate. | |
| * @param integer $length Length of returned string, including ellipsis. | |
| * @param string $ending Ending to be appended to the trimmed string. | |
| * @param boolean $exact If false, $text will not be cut mid-word | |
| * @param boolean $considerHtml If true, HTML tags would be handled correctly | |
| * @return string Trimmed string. | |
| */ | |
| function truncate($text, $length = 100, $ending = '...', $exact = true, $considerHtml = false) { | |
| if ($considerHtml) { | |
| // if the plain text is shorter than the maximum length, return the whole text | |
| if (strlen(preg_replace('/<.*?>/', '', $text)) <= $length) { | |
| return $text; | |
| } | |
| // splits all html-tags to scanable lines | |
| preg_match_all('/(<.+?>)?([^<>]*)/s', $text, $lines, PREG_SET_ORDER); | |
| $total_length = strlen($ending); | |
| $open_tags = array(); | |
| $truncate = ''; | |
| foreach ($lines as $line_matchings) { | |
| // if there is any html-tag in this line, handle it and add it (uncounted) to the output | |
| if (!empty($line_matchings[1])) { | |
| // if it’s an “empty element” with or without xhtml-conform closing slash (f.e.) | |
| if (preg_match('/^<(\s*.+?\/\s*|\s*(img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param)(\s.+?)?)>$/is', $line_matchings[1])) { | |
| // do nothing | |
| // if tag is a closing tag (f.e.) | |
| } else if (preg_match('/^<\s*\/([^\s]+?)\s*>$/s', $line_matchings[1], $tag_matchings)) { | |
| // delete tag from $open_tags list | |
| $pos = array_search($tag_matchings[1], $open_tags); | |
| if ($pos !== false) { | |
| unset($open_tags[$pos]); | |
| } | |
| // if tag is an opening tag (f.e. ) | |
| } else if (preg_match('/^<\s*([^\s>!]+).*?>$/s', $line_matchings[1], $tag_matchings)) { | |
| // add tag to the beginning of $open_tags list | |
| array_unshift($open_tags, strtolower($tag_matchings[1])); | |
| } | |
| // add html-tag to $truncate’d text | |
| $truncate .= $line_matchings[1]; | |
| } | |
| // calculate the length of the plain text part of the line; handle entities as one character | |
| $content_length = strlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', ' ', $line_matchings[2])); | |
| if ($total_length+$content_length > $length) { | |
| // the number of characters which are left | |
| $left = $length - $total_length; | |
| $entities_length = 0; | |
| // search for html entities | |
| if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', $line_matchings[2], $entities, PREG_OFFSET_CAPTURE)) { | |
| // calculate the real length of all entities in the legal range | |
| foreach ($entities[0] as $entity) { | |
| if ($entity[1]+1-$entities_length <= $left) { | |
| $left--; | |
| $entities_length += strlen($entity[0]); | |
| } else { | |
| // no more characters left | |
| break; | |
| } | |
| } | |
| } | |
| $truncate .= substr($line_matchings[2], 0, $left+$entities_length); | |
| // maximum lenght is reached, so get off the loop | |
| break; | |
| } else { | |
| $truncate .= $line_matchings[2]; | |
| $total_length += $content_length; | |
| } | |
| // if the maximum length is reached, get off the loop | |
| if($total_length >= $length) { | |
| break; | |
| } | |
| } | |
| } else { | |
| if (strlen($text) <= $length) { | |
| return $text; | |
| } else { | |
| $truncate = substr($text, 0, $length - strlen($ending)); | |
| } | |
| } | |
| // if the words shouldn't be cut in the middle... | |
| if (!$exact) { | |
| // ...search the last occurance of a space... | |
| $spacepos = strrpos($truncate, ' '); | |
| if (isset($spacepos)) { | |
| // ...and cut the text in this position | |
| $truncate = substr($truncate, 0, $spacepos); | |
| } | |
| } | |
| // add the defined ending to the text | |
| $truncate .= $ending; | |
| if($considerHtml) { | |
| // close all unclosed html-tags | |
| foreach ($open_tags as $tag) { | |
| $truncate .= ''; | |
| } | |
| } | |
| return $truncate; | |
| } |
Thanks a lot! @getmanzooronline and @jakobtrost
Thanks! Works great!
Thanks this is the best solution I have found so far to cut a string and keep HTML tags
For UTF-8, I made the following changes
Converted strlen to mb_strlen
Converted strtolower to mb_strtolower
Converted substr to mb_substr
Converted strrpos to mb_strrpos
This version would be absolutely great if it would be possible to use it with HTML entities (like or or other entities) - I'am struggeling about this since a lot of days and didn't get it work with my own code - Perhaps there's someone with an idea.
I tried with your code and it works but i found a problem that the html entitity ­ or ­ will be calculated as one charachter. By using soft hyphens the final text will be "shorter" based on use of free space per line. If i will get a text with 120 characters i had to define more than 170 (based on the number of hyphens in the text) - the next text with lesse hyphens will be more longer.
SO my question is how to do this with your code? Would it be possible to add a black list with entities which shouldnt be counted?
Regards André
Thanks a lot!
Had to change the ending though:
foreach ($open_tags as $tag) { $truncate .= "</".$tag.">"; }