Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
35.50% covered (danger)
35.50%
82 / 231
70.00% covered (warning)
70.00%
7 / 10
CRAP
0.00% covered (danger)
0.00%
0 / 1
Jetpack_Media_Summary
35.50% covered (danger)
35.50%
82 / 231
70.00% covered (warning)
70.00%
7 / 10
1668.13
0.00% covered (danger)
0.00%
0 / 1
 get
26.86% covered (danger)
26.86%
47 / 175
0.00% covered (danger)
0.00%
0 / 1
1421.13
 https
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 ssl_img
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 get_video_poster
0.00% covered (danger)
0.00%
0 / 8
0.00% covered (danger)
0.00%
0 / 1
30
 clean_text
100.00% covered (success)
100.00%
15 / 15
100.00% covered (success)
100.00%
1 / 1
1
 get_excerpt
40.91% covered (danger)
40.91%
9 / 22
0.00% covered (danger)
0.00%
0 / 1
7.30
 split_content_in_words
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
2
 get_word_count
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 get_word_remaining_count
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 get_link_count
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
1<?php // phpcs:ignore WordPress.Files.FileName.InvalidClassFileName
2/**
3 * Provides media summary of a post.
4 *
5 * @package automattic/jetpack
6 */
7
8use Automattic\Jetpack\Image_CDN\Image_CDN_Core;
9
10/**
11 * Class Jetpack_Media_Summary
12 *
13 * Priority: embed [video] > gallery > image > text
14 */
15class Jetpack_Media_Summary {
16
17    /**
18     * Media cache.
19     *
20     * @var array
21     */
22    private static $cache = array();
23
24    /**
25     * Get media summary for a post.
26     *
27     * @param ?int  $post_id Post ID.
28     * @param int   $blog_id Blog ID, if applicable.
29     * @param array $args {
30     *      Optional. An array of arguments.
31     *      @type int $max_words Maximum number of words.
32     *      @type int $max_chars Maximum number of characters.
33     *      @type bool $include_excerpt Whether to compute the excerpt and return it. Default true.
34     *      @type bool $include_counts  Whether to compute word/link counts. Default true.
35     * }
36     *
37     * @return array|mixed|void
38     */
39    public static function get( ?int $post_id, int $blog_id = 0, array $args = array() ) {
40        $post_id = (int) $post_id;
41
42        $defaults = array(
43            'max_words'       => 16,
44            'max_chars'       => 256,
45            'include_excerpt' => true,
46            'include_counts'  => true,
47        );
48        $args     = wp_parse_args( $args, $defaults );
49
50        $switched = false;
51        if ( ! empty( $blog_id ) && get_current_blog_id() !== $blog_id && function_exists( 'switch_to_blog' ) ) {
52            switch_to_blog( $blog_id );
53            $switched = true;
54        } else {
55            $blog_id = get_current_blog_id();
56        }
57
58        $cache_key = "{$blog_id}_{$post_id}_{$args['max_words']}_{$args['max_chars']}_"
59            . (int) $args['include_excerpt'] . '_' . (int) $args['include_counts'];
60        if ( isset( self::$cache[ $cache_key ] ) ) {
61            if ( $switched ) {
62                restore_current_blog();
63            }
64            return self::$cache[ $cache_key ];
65        }
66
67        if ( ! class_exists( 'Jetpack_Media_Meta_Extractor' ) ) {
68            require_once JETPACK__PLUGIN_DIR . '_inc/lib/class.media-extractor.php';
69        }
70
71        $post      = get_post( $post_id );
72        $permalink = get_permalink( $post_id );
73
74        $return = array(
75            'type'       => 'standard',
76            'permalink'  => $permalink,
77            'image'      => '',
78            'excerpt'    => '',
79            'word_count' => 0,
80            'secure'     => array(
81                'image' => '',
82            ),
83            'count'      => array(
84                'image'          => 0,
85                'video'          => 0,
86                'word'           => 0,
87                'word_remaining' => 0,
88                'link'           => 0,
89            ),
90        );
91
92        if ( $post instanceof WP_Post && empty( $post->post_password ) ) {
93            if ( $args['include_excerpt'] ) {
94                $return['excerpt'] = self::get_excerpt( $post->post_content, $post->post_excerpt, $args['max_words'], $args['max_chars'], $post );
95            }
96            if ( $args['include_counts'] ) {
97                $return['count']['word'] = self::get_word_count( $post->post_content );
98                $return['count']['link'] = self::get_link_count( $post->post_content );
99                // Only compute word_remaining if we have an excerpt. If not, leave the default of 0.
100                if ( $args['include_excerpt'] && '' !== $return['excerpt'] ) {
101                    $return['count']['word_remaining'] = self::get_word_remaining_count( $post->post_content, $return['excerpt'] );
102                }
103            }
104        }
105
106        $extract = Jetpack_Media_Meta_Extractor::extract( $blog_id, $post_id, Jetpack_Media_Meta_Extractor::ALL );
107
108        if ( empty( $extract['has'] ) ) {
109            if ( $switched ) {
110                restore_current_blog();
111            }
112            self::$cache[ $cache_key ] = $return;
113            return $return;
114        }
115
116        // Prioritize [some] video embeds.
117        if ( ! empty( $extract['has']['shortcode'] ) ) {
118            foreach ( $extract['shortcode'] as $type => $data ) {
119                switch ( $type ) {
120                    case 'videopress':
121                    case 'wpvideo':
122                        if ( 0 === $return['count']['video'] ) {
123                            // If there is no id on the video, then let's just skip this.
124                            if ( ! isset( $data['id'][0] ) ) {
125                                break;
126                            }
127
128                            $guid       = $data['id'][0];
129                            $video_info = videopress_get_video_details( $guid );
130
131                            // Only add the video tags if the guid returns a valid videopress object.
132                            if ( $video_info instanceof stdClass ) {
133                                // Continue early if we can't find a Video slug.
134                                if ( empty( $video_info->files->std->mp4 ) ) {
135                                    break;
136                                }
137
138                                $url = sprintf(
139                                    'https://videos.files.wordpress.com/%1$s/%2$s',
140                                    $guid,
141                                    $video_info->files->std->mp4
142                                );
143
144                                $thumbnail = $video_info->poster;
145                                if ( ! empty( $thumbnail ) ) {
146                                    $return['image']           = $thumbnail;
147                                    $return['secure']['image'] = $thumbnail;
148                                }
149
150                                $return['type']            = 'video';
151                                $return['video']           = esc_url_raw( $url );
152                                $return['video_type']      = 'video/mp4';
153                                $return['secure']['video'] = $return['video'];
154                            }
155                        }
156                        ++$return['count']['video'];
157                        break;
158                    case 'youtube':
159                        if ( 0 === $return['count']['video'] ) {
160                            if ( ! isset( $extract['shortcode']['youtube']['id'][0] ) ) {
161                                break;
162                            }
163                            $return['type']            = 'video';
164                            $return['video']           = esc_url_raw( 'http://www.youtube.com/watch?feature=player_embedded&v=' . $extract['shortcode']['youtube']['id'][0] );
165                            $return['image']           = self::get_video_poster( 'youtube', $extract['shortcode']['youtube']['id'][0] );
166                            $return['secure']['video'] = self::https( $return['video'] );
167                            $return['secure']['image'] = self::https( $return['image'] );
168                        }
169                        ++$return['count']['video'];
170                        break;
171                    case 'vimeo':
172                        if ( 0 === $return['count']['video'] ) {
173                            if ( ! isset( $extract['shortcode']['vimeo']['id'][0] ) ) {
174                                break;
175                            }
176                            $return['type']            = 'video';
177                            $return['video']           = esc_url_raw( 'http://vimeo.com/' . $extract['shortcode']['vimeo']['id'][0] );
178                            $return['secure']['video'] = self::https( $return['video'] );
179
180                            $poster_image = get_post_meta( $post_id, 'vimeo_poster_image', true );
181                            if ( ! empty( $poster_image ) ) {
182                                $return['image']           = $poster_image;
183                                $poster_url_parts          = wp_parse_url( $poster_image );
184                                $return['secure']['image'] = 'https://secure-a.vimeocdn.com' . $poster_url_parts['path'];
185                            }
186                        }
187                        ++$return['count']['video'];
188                        break;
189                }
190            }
191        }
192
193        if ( ! empty( $extract['has']['embed'] ) ) {
194            foreach ( $extract['embed']['url'] as $embed ) {
195                if ( preg_match( '/((youtube|vimeo|dailymotion)\.com|youtu.be)/', $embed ) ) {
196                    if ( 0 === $return['count']['video'] ) {
197                        $return['type']            = 'video';
198                        $return['video']           = 'http://' . $embed;
199                        $return['secure']['video'] = self::https( $return['video'] );
200                        if ( str_contains( $embed, 'youtube' ) ) {
201                            $return['image']           = self::get_video_poster( 'youtube', jetpack_get_youtube_id( $return['video'] ) );
202                            $return['secure']['image'] = self::https( $return['image'] );
203                        } elseif ( str_contains( $embed, 'youtu.be' ) ) {
204                            $youtube_id                = jetpack_get_youtube_id( $return['video'] );
205                            $return['video']           = 'http://youtube.com/watch?v=' . $youtube_id . '&feature=youtu.be';
206                            $return['secure']['video'] = self::https( $return['video'] );
207                            $return['image']           = self::get_video_poster( 'youtube', jetpack_get_youtube_id( $return['video'] ) );
208                            $return['secure']['image'] = self::https( $return['image'] );
209                        } elseif ( str_contains( $embed, 'vimeo' ) ) {
210                            $poster_image = get_post_meta( $post_id, 'vimeo_poster_image', true );
211                            if ( ! empty( $poster_image ) ) {
212                                $return['image']           = $poster_image;
213                                $poster_url_parts          = wp_parse_url( $poster_image );
214                                $return['secure']['image'] = 'https://secure-a.vimeocdn.com' . $poster_url_parts['path'];
215                            }
216                        } elseif ( str_contains( $embed, 'dailymotion' ) ) {
217                            $return['image']           = str_replace( 'dailymotion.com/video/', 'dailymotion.com/thumbnail/video/', $embed );
218                            $return['image']           = wp_parse_url( $return['image'], PHP_URL_SCHEME ) === null ? 'http://' . $return['image'] : $return['image'];
219                            $return['secure']['image'] = self::https( $return['image'] );
220                        }
221                    }
222                    ++$return['count']['video'];
223                }
224            }
225        }
226
227        // Do we really want to make the video the primary focus of the post?
228        if ( 'video' === $return['type'] ) {
229            $content              = wpautop( wp_strip_all_tags( $post->post_content ) );
230            $paragraphs           = explode( '</p>', $content );
231            $number_of_paragraphs = 0;
232
233            foreach ( $paragraphs as $i => $paragraph ) {
234                // Don't include blank lines as a paragraph.
235                if ( '' === trim( $paragraph ) ) {
236                    unset( $paragraphs[ $i ] );
237                    continue;
238                }
239                ++$number_of_paragraphs;
240            }
241
242            $number_of_paragraphs = $number_of_paragraphs - $return['count']['video']; // subtract amount for videos.
243
244            // More than 2 paragraph? The video is not the primary focus so we can do some more analysis.
245            if ( $number_of_paragraphs > 2 ) {
246                $return['type'] = 'standard';
247            }
248        }
249
250        // If we don't have any prioritized embed...
251        if ( 'standard' === $return['type'] ) {
252            if ( ( ! empty( $extract['has']['gallery'] ) || ! empty( $extract['shortcode']['gallery']['count'] ) ) && ! empty( $extract['image'] ) ) {
253                // ... Then we prioritize galleries first (multiple images returned)
254                $return['type']   = 'gallery';
255                $return['images'] = $extract['image'];
256                foreach ( $return['images'] as $image ) {
257                    $return['secure']['images'][] = array( 'url' => self::ssl_img( $image['url'] ) );
258                    ++$return['count']['image'];
259                }
260            } elseif ( ! empty( $extract['has']['image'] ) ) {
261                // ... Or we try and select a single image that would make sense.
262                $content              = wpautop( wp_strip_all_tags( $post->post_content ) );
263                $paragraphs           = explode( '</p>', $content );
264                $number_of_paragraphs = 0;
265
266                foreach ( $paragraphs as $i => $paragraph ) {
267                    // Don't include 'actual' captions as a paragraph.
268                    if ( str_contains( $paragraph, '[caption' ) ) {
269                        unset( $paragraphs[ $i ] );
270                        continue;
271                    }
272                    // Don't include blank lines as a paragraph.
273                    if ( '' === trim( $paragraph ) ) {
274                        unset( $paragraphs[ $i ] );
275                        continue;
276                    }
277                    ++$number_of_paragraphs;
278                }
279
280                // @phan-suppress-next-line PhanTypeMismatchDimFetch -- Phan is understandably confused, as $extract has many forms, including this one.
281                if ( ! empty( $extract['image'][0]['url'] ) ) {
282                    $return['image']           = $extract['image'][0]['url'];
283                    $return['secure']['image'] = self::ssl_img( $return['image'] );
284                    ++$return['count']['image'];
285                }
286
287                if ( $number_of_paragraphs <= 2 && is_countable( $extract['image'] ) && 1 === count( $extract['image'] ) ) {
288                    // If we have lots of text or images, let's not treat it as an image post, but return its first image.
289                    $return['type'] = 'image';
290                }
291            }
292        }
293
294        if ( $switched ) {
295            restore_current_blog();
296        }
297
298        /**
299         * Allow a theme or plugin to inspect and ultimately change the media summary.
300         *
301         * @since 4.4.0
302         *
303         * @param array $data The calculated media summary data.
304         * @param int $post_id The id of the post this data applies to.
305         */
306        $return = apply_filters( 'jetpack_media_summary_output', $return, $post_id );
307
308        self::$cache[ $cache_key ] = $return;
309
310        return $return;
311    }
312
313    /**
314     * Converts http to https://
315     *
316     * @param string $str URL.
317     *
318     * @return string URL.
319     */
320    public static function https( $str ) {
321        return str_replace( 'http://', 'https://', $str );
322    }
323
324    /**
325     * Returns a Photonized version of the URL.
326     *
327     * @param string $url URL.
328     *
329     * @return string URL.
330     */
331    public static function ssl_img( $url ) {
332        if ( str_contains( $url, 'files.wordpress.com' ) ) {
333            return self::https( $url );
334        } else {
335            return self::https( Image_CDN_Core::cdn_url( $url ) );
336        }
337    }
338
339    /**
340     * Get the video poster.
341     *
342     * @param string $type Video service.
343     * @param string $id Video ID for the service.
344     *
345     * @return string URL of image thumbnail for the video.
346     */
347    public static function get_video_poster( $type, $id ) {
348        if ( 'videopress' === $type ) {
349            if ( function_exists( 'video_get_highest_resolution_image_url' ) ) {
350                return video_get_highest_resolution_image_url( $id );
351            } elseif ( class_exists( 'VideoPress_Video' ) ) {
352                $video = new VideoPress_Video( $id );
353                return $video->poster_frame_uri;
354            }
355        } elseif ( 'youtube' === $type ) {
356            return 'http://img.youtube.com/vi/' . $id . '/0.jpg';
357        }
358    }
359
360    /**
361     * Clean text of shortcodes and tags.
362     *
363     * @param string $text Dirty text.
364     *
365     * @return string Clean text.
366     */
367    public static function clean_text( $text ) {
368        return trim(
369            preg_replace(
370                '/[\s]+/',
371                ' ',
372                preg_replace(
373                    '@https?://[\S]+@',
374                    '',
375                    strip_shortcodes(
376                        wp_strip_all_tags(
377                            $text
378                        )
379                    )
380                )
381            )
382        );
383    }
384
385    /**
386     * Retrieve an excerpt for the post summary.
387     *
388     * This function works around a suspected problem with Core. If resolved, this function should be simplified.
389     *
390     * @link https://github.com/Automattic/jetpack/pull/8510
391     * @link https://core.trac.wordpress.org/ticket/42814
392     *
393     * @param  string  $post_content The post's content.
394     * @param  string  $post_excerpt The post's excerpt. Empty if none was explicitly set.
395     * @param  int     $max_words Maximum number of words for the excerpt. Used on wp.com. Default 16.
396     * @param  int     $max_chars Maximum characters in the excerpt. Used on wp.com. Default 256.
397     * @param  WP_Post $requested_post The post object.
398     * @return string Post excerpt.
399     **/
400    public static function get_excerpt( $post_content, $post_excerpt, $max_words = 16, $max_chars = 256, $requested_post = null ) {
401        global $post;
402        $original_post = $post; // Saving the global for later use.
403        if ( empty( $post_excerpt ) && function_exists( 'wpcom_enhanced_excerpt_extract_excerpt' ) ) {
404            return self::clean_text(
405                wpcom_enhanced_excerpt_extract_excerpt(
406                    array(
407                        'text'                => $post_content,
408                        'excerpt_only'        => true,
409                        'show_read_more'      => false,
410                        'max_words'           => $max_words,
411                        'max_chars'           => $max_chars,
412                        'read_more_threshold' => 25,
413                    )
414                )
415            );
416        } elseif ( $requested_post instanceof WP_Post ) {
417            // @todo Refactor to not need to override the global.
418            // phpcs:ignore: WordPress.WP.GlobalVariablesOverride.Prohibited
419            $post = $requested_post; // setup_postdata does not set the global.
420            setup_postdata( $post );
421            /** This filter is documented in core/src/wp-includes/post-template.php */
422            $post_excerpt = apply_filters( 'get_the_excerpt', $post_excerpt, $post );
423            // phpcs:ignore: WordPress.WP.GlobalVariablesOverride.Prohibited
424            $post = $original_post; // wp_reset_postdata uses the $post global.
425            wp_reset_postdata();
426            return self::clean_text( $post_excerpt );
427        }
428        return '';
429    }
430
431    /**
432     * Split a string into an array of words.
433     *
434     * @param string $text Post content or excerpt.
435     *
436     * @return array Array of words.
437     */
438    public static function split_content_in_words( $text ) {
439        $words = preg_split( '/[\s!?;,.]+/', $text, -1, PREG_SPLIT_NO_EMPTY );
440
441        // Return an empty array if the split above fails.
442        return $words ? $words : array();
443    }
444
445    /**
446     * Get the word count.
447     *
448     * @param string $post_content Post content.
449     *
450     * @return int Word count.
451     */
452    public static function get_word_count( $post_content ) {
453        return count( self::split_content_in_words( self::clean_text( $post_content ) ) );
454    }
455
456    /**
457     * Get remainder word count (after the excerpt).
458     *
459     * @param string $post_content Post content.
460     * @param string $excerpt_content Excerpt content.
461     *
462     * @return int Number of words after the excerpt.
463     */
464    public static function get_word_remaining_count( $post_content, $excerpt_content ) {
465        $content_word_count = count( self::split_content_in_words( self::clean_text( $post_content ) ) );
466        $excerpt_word_count = count( self::split_content_in_words( self::clean_text( $excerpt_content ) ) );
467
468        return $content_word_count - $excerpt_word_count;
469    }
470
471    /**
472     * Counts the number of links in a post.
473     *
474     * @param string $post_content Post content.
475     *
476     * @return false|int Number of links.
477     */
478    public static function get_link_count( $post_content ) {
479        return preg_match_all( '/\<a[\> ]/', $post_content, $matches );
480    }
481}