Preventing search engines from indexing the DEV version of the site
How to prohibit search engines from indexing a website if it's a development version (DEV copy)?
Very often, a site has two or three versions online: DEV and PROD (sometimes also STAGE). The DEV version should always be blocked from indexing, and there are various ways to do this. Often, it is blocked by a password using .htpasswd, but this is not always convenient. A much more convenient way is to use the following code to block the site from indexing:
/**
* Close from search engines indexing for dev, stage environment.
*
* @version 1.0
*/
final class WP_Kama_Disable_Dev_Env_Indexing {
public static function init(): void {
add_action( 'init', [ __CLASS__, 'disable_indexing' ] );
}
public static function disable_indexing(): void {
if( ! self::is_blocking_on() ){
return;
}
self::block_search_agents();
add_filter( 'wp_headers', [ __CLASS__, '_HTTP_header' ] );
add_filter( 'robots_txt', [ __CLASS__, '_robots_txt' ] );
// Not recommended. In order to have the same behaviour as on prod
# add_filter( 'wp_robots', [ __CLASS__, '_robots_meta_tag' ], 999 );
}
/**
* Checks whether we should disable indexing.
*/
private static function is_blocking_on(): bool {
if( in_array( wp_get_environment_type(), [ 'production', 'local' ], true ) ){
return false;
}
if( current_user_can( 'administrator' ) ){
return false;
}
return true;
}
/**
* 403 response for search agents.
*/
private static function block_search_agents(): void {
$robots = 'libwww|Wget|LWP|damnBot|BBBike|spider|crawl|google|bing|yandex|msnbot';
$user_agent = ( $_SERVER['HTTP_USER_AGENT'] ?? '' );
if( preg_match( "/$robots/i", $user_agent ) ) {
http_response_code( 403 );
die( 'Indexing of this site is Forbidden for robots.' );
}
}
public static function _HTTP_header( array $headers ): array {
$headers['X-Robots-Tag'] = 'noindex, nofollow';
return $headers;
}
public static function _robots_txt(): string {
return "User-agent: *\nDisallow: /";
}
/**
* Callback for hook `wp_robots`.
* Adds `<meta name='robots' content='noindex, follow' />` HTML meta tag.
*/
public static function _robots_meta_tag( array $robots ): array {
$robots['noindex'] = true;
$robots['nofollow'] = true;
unset( $robots['follow'] );
return $robots;
}
}
Now, simply call this function anywhere: in a mu-plugin, plugin, or in the functions.php file:
WP_Kama_Disable_Dev_Env_Indexing::init();
IMPORTANT! For this code to work, you need to set the constant WP_ENVIRONMENT_TYPE in the wp-config.php file. The constant's value should be different in each environment.