Preventing search engines from indexing the DEV version of the site
How to prohibit search engines from indexing a website if it's a development version (DEV copy)?
Very often, a site has two or three versions online: DEV and PROD (sometimes also STAGE). The DEV version should always be blocked from indexing, and there are various ways to do this. Often, it is blocked by a password using .htpasswd, but this is not always convenient. A much more convenient way is to use the following code to block the site from indexing:
/** * Close from search engines indexing for dev, stage environment. * * @version 1.0 */ final class WP_Kama_Disable_Dev_Env_Indexing { public static function init(): void { add_action( 'init', [ __CLASS__, 'disable_indexing' ] ); } public static function disable_indexing(): void { if( ! self::is_blocking_on() ){ return; } self::block_search_agents(); add_filter( 'wp_headers', [ __CLASS__, '_HTTP_header' ] ); add_filter( 'robots_txt', [ __CLASS__, '_robots_txt' ] ); // Not recommended. In order to have the same behaviour as on prod # add_filter( 'wp_robots', [ __CLASS__, '_robots_meta_tag' ], 999 ); } /** * Checks whether we should disable indexing. */ private static function is_blocking_on(): bool { if( in_array( wp_get_environment_type(), [ 'production', 'local' ], true ) ){ return false; } if( current_user_can( 'administrator' ) ){ return false; } return true; } /** * 403 response for search agents. */ private static function block_search_agents(): void { $robots = 'libwww|Wget|LWP|damnBot|BBBike|spider|crawl|google|bing|yandex|msnbot'; $user_agent = ( $_SERVER['HTTP_USER_AGENT'] ?? '' ); if( preg_match( "/$robots/i", $user_agent ) ) { http_response_code( 403 ); die( 'Indexing of this site is Forbidden for robots.' ); } } public static function _HTTP_header( array $headers ): array { $headers['X-Robots-Tag'] = 'noindex, nofollow'; return $headers; } public static function _robots_txt(): string { return "User-agent: *\nDisallow: /"; } /** * Callback for hook `wp_robots`. * Adds `<meta name='robots' content='noindex, follow' />` HTML meta tag. */ public static function _robots_meta_tag( array $robots ): array { $robots['noindex'] = true; $robots['nofollow'] = true; unset( $robots['follow'] ); return $robots; } }
Now, simply call this function anywhere: in a mu-plugin, plugin, or in the functions.php file:
WP_Kama_Disable_Dev_Env_Indexing::init();
IMPORTANT! For this code to work, you need to set the constant WP_ENVIRONMENT_TYPE in the wp-config.php file. The constant's value should be different in each environment.