<?php
/**
 * AEO (AI Engine Optimization) Analyzer
 *
 * @package    Respira_For_WordPress
 * @subpackage Respira_For_WordPress/includes/analyzers
 */

/**
 * Analyzes content for AI search engines (Perplexity, ChatGPT, etc.).
 *
 * @since 1.0.0
 */
class Respira_AEO_Analyzer {

	/**
	 * Analyze content for AI Engine Optimization
	 *
	 * @since 1.0.0
	 * @param int $page_id Page ID to analyze.
	 * @return array AEO analysis results.
	 */
	public function analyze_aeo( $page_id ) {
		$post = get_post( $page_id );
		if ( ! $post ) {
			return array(
				'success' => false,
				'message' => 'Page not found',
			);
		}

		$issues         = array();
		$recommendations = array();
		$metrics        = array();

		// Structured data analysis.
		$structured_data_analysis = $this->analyze_structured_data_completeness( $page_id );
		$metrics                  = array_merge( $metrics, $structured_data_analysis['metrics'] );
		$issues                   = array_merge( $issues, $structured_data_analysis['issues'] );
		$recommendations          = array_merge( $recommendations, $structured_data_analysis['recommendations'] );

		// Content clarity and structure.
		$clarity_analysis = $this->analyze_content_clarity( $post->post_content );
		$metrics         = array_merge( $metrics, $clarity_analysis['metrics'] );
		$issues          = array_merge( $issues, $clarity_analysis['issues'] );
		$recommendations = array_merge( $recommendations, $clarity_analysis['recommendations'] );

		// Semantic HTML usage.
		$semantic_analysis = $this->analyze_semantic_html( $post->post_content );
		$metrics          = array_merge( $metrics, $semantic_analysis['metrics'] );
		$issues           = array_merge( $issues, $semantic_analysis['issues'] );
		$recommendations  = array_merge( $recommendations, $semantic_analysis['recommendations'] );

		// Entity recognition opportunities.
		$entity_analysis = $this->analyze_entities( $post->post_content, $post->post_title );
		$metrics        = array_merge( $metrics, $entity_analysis['metrics'] );
		$issues         = array_merge( $issues, $entity_analysis['issues'] );
		$recommendations = array_merge( $recommendations, $entity_analysis['recommendations'] );

		// Content depth and comprehensiveness.
		$depth_analysis  = $this->analyze_content_depth( $post->post_content );
		$metrics        = array_merge( $metrics, $depth_analysis['metrics'] );
		$issues         = array_merge( $issues, $depth_analysis['issues'] );
		$recommendations = array_merge( $recommendations, $depth_analysis['recommendations'] );

		// FAQ schema opportunities.
		$faq_analysis    = $this->analyze_faq_opportunities( $post->post_content );
		$metrics        = array_merge( $metrics, $faq_analysis['metrics'] );
		$issues         = array_merge( $issues, $faq_analysis['issues'] );
		$recommendations = array_merge( $recommendations, $faq_analysis['recommendations'] );

		// Calculate overall AEO score.
		$score = $this->calculate_aeo_score( $issues, $metrics );
		$grade = $this->get_grade_from_score( $score );

		return array(
			'success'         => true,
			'score'           => $score,
			'grade'           => $grade,
			'issues'          => $issues,
			'recommendations' => $recommendations,
			'metrics'         => $metrics,
		);
	}

	/**
	 * Check structured data (schema markup)
	 *
	 * @since 1.0.0
	 * @param int $page_id Page ID to analyze.
	 * @return array Structured data analysis.
	 */
	public function check_structured_data( $page_id ) {
		$post = get_post( $page_id );
		if ( ! $post ) {
			return array(
				'success' => false,
				'message' => 'Page not found',
			);
		}

		$analysis = $this->analyze_structured_data_completeness( $page_id );

		$score = $this->calculate_structured_data_score( $analysis['issues'], $analysis['metrics'] );
		$grade = $this->get_grade_from_score( $score );

		return array(
			'success'         => true,
			'score'           => $score,
			'grade'           => $grade,
			'issues'          => $analysis['issues'],
			'recommendations' => $analysis['recommendations'],
			'metrics'         => $analysis['metrics'],
		);
	}

	/**
	 * Analyze structured data completeness
	 *
	 * @param int $page_id Page ID.
	 * @return array Structured data analysis.
	 */
	private function analyze_structured_data_completeness( $page_id ) {
		$issues         = array();
		$recommendations = array();
		$metrics        = array(
			'has_json_ld'       => false,
			'has_microdata'     => false,
			'schema_types'      => array(),
			'missing_schemas'   => array(),
		);

		$post    = get_post( $page_id );
		$content = $post->post_content;

		// Check for JSON-LD.
		if ( preg_match( '/<script[^>]*type=["\']application\/ld\+json["\'][^>]*>/i', $content ) ) {
			$metrics['has_json_ld'] = true;

			// Extract schema types.
			preg_match_all( '/"@type"\s*:\s*"([^"]+)"/', $content, $matches );
			if ( ! empty( $matches[1] ) ) {
				$metrics['schema_types'] = array_unique( $matches[1] );
			}
		}

		// Check for microdata.
		if ( preg_match( '/itemscope|itemprop|itemtype/i', $content ) ) {
			$metrics['has_microdata'] = true;
		}

		if ( ! $metrics['has_json_ld'] && ! $metrics['has_microdata'] ) {
			$issues[] = array(
				'type'     => 'warning',
				'message'  => 'No structured data found',
				'severity' => 'high',
				'fix'      => 'Add JSON-LD schema markup for better AI understanding',
			);

			$recommendations[] = array(
				'priority' => 'high',
				'action'   => 'Add structured data',
				'details'  => 'Implement JSON-LD schema markup (Article, Organization, or relevant type)',
			);
		}

		// Check for specific schema types based on content type.
		$post_type = get_post_type( $page_id );
		$suggested_schemas = $this->get_suggested_schemas( $post_type, $content );

		foreach ( $suggested_schemas as $schema ) {
			if ( ! in_array( $schema, $metrics['schema_types'], true ) ) {
				$metrics['missing_schemas'][] = $schema;
			}
		}

		if ( ! empty( $metrics['missing_schemas'] ) ) {
			$recommendations[] = array(
				'priority' => 'medium',
				'action'   => 'Add recommended schema types',
				'details'  => 'Consider adding: ' . implode( ', ', $metrics['missing_schemas'] ),
			);
		}

		return array(
			'issues'          => $issues,
			'recommendations' => $recommendations,
			'metrics'         => $metrics,
		);
	}

	/**
	 * Analyze content clarity for AI parsing
	 *
	 * @param string $content Page content.
	 * @return array Clarity analysis.
	 */
	private function analyze_content_clarity( $content ) {
		$issues         = array();
		$recommendations = array();
		$metrics        = array();

		$text = strip_tags( $content );

		// Check for clear sections.
		$has_headings   = preg_match( '/<h[1-6][^>]*>/i', $content );
		$heading_count  = preg_match_all( '/<h[1-6][^>]*>/i', $content, $matches );

		$metrics['has_clear_structure'] = $has_headings;
		$metrics['heading_count']       = $heading_count;

		if ( ! $has_headings ) {
			$issues[] = array(
				'type'     => 'warning',
				'message'  => 'No headings found for content structure',
				'severity' => 'medium',
				'fix'      => 'Add headings (H2, H3, etc.) to organize content into clear sections',
			);
		}

		// Check for lists (AI-friendly format).
		$list_count = preg_match_all( '/<(?:ul|ol)[^>]*>/i', $content, $matches );
		$metrics['list_count'] = $list_count;

		if ( $list_count === 0 && str_word_count( $text ) > 500 ) {
			$recommendations[] = array(
				'priority' => 'low',
				'action'   => 'Add lists for better clarity',
				'details'  => 'Use bullet points or numbered lists to make information easier to parse',
			);
		}

		// Check for tables (data presentation).
		$table_count = preg_match_all( '/<table[^>]*>/i', $content, $matches );
		$metrics['table_count'] = $table_count;

		// Check paragraph length.
		$paragraphs = preg_split( '/\n\s*\n/', $text );
		$avg_paragraph_length = 0;
		if ( count( $paragraphs ) > 0 ) {
			$total_words = 0;
			foreach ( $paragraphs as $paragraph ) {
				$total_words += str_word_count( $paragraph );
			}
			$avg_paragraph_length = $total_words / count( $paragraphs );
		}
		$metrics['avg_paragraph_length'] = round( $avg_paragraph_length );

		return array(
			'issues'          => $issues,
			'recommendations' => $recommendations,
			'metrics'         => $metrics,
		);
	}

	/**
	 * Analyze semantic HTML usage
	 *
	 * @param string $content Page content.
	 * @return array Semantic HTML analysis.
	 */
	private function analyze_semantic_html( $content ) {
		$issues         = array();
		$recommendations = array();
		$metrics        = array(
			'uses_article'  => false,
			'uses_section'  => false,
			'uses_aside'    => false,
			'uses_nav'      => false,
			'uses_header'   => false,
			'uses_footer'   => false,
		);

		// Check for semantic HTML5 elements.
		$semantic_tags = array( 'article', 'section', 'aside', 'nav', 'header', 'footer' );

		foreach ( $semantic_tags as $tag ) {
			if ( preg_match( "/<{$tag}[^>]*>/i", $content ) ) {
				$metrics[ "uses_{$tag}" ] = true;
			}
		}

		// Count how many semantic tags are used.
		$semantic_usage_count = count( array_filter( $metrics ) );
		$metrics['semantic_tag_count'] = $semantic_usage_count;

		if ( $semantic_usage_count === 0 ) {
			$recommendations[] = array(
				'priority' => 'low',
				'action'   => 'Use semantic HTML5 elements',
				'details'  => 'Replace generic divs with semantic tags like <article>, <section>, <header>',
			);
		}

		return array(
			'issues'          => $issues,
			'recommendations' => $recommendations,
			'metrics'         => $metrics,
		);
	}

	/**
	 * Analyze entities and named entity recognition opportunities
	 *
	 * @param string $content Page content.
	 * @param string $title Page title.
	 * @return array Entity analysis.
	 */
	private function analyze_entities( $content, $title ) {
		$issues         = array();
		$recommendations = array();
		$metrics        = array();

		$text = strip_tags( $content );

		// Check for capitalized words (potential entities).
		preg_match_all( '/\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b/', $text, $matches );
		$potential_entities = array_unique( $matches[0] );

		// Filter out common words.
		$common_words = array( 'The', 'This', 'That', 'These', 'Those', 'What', 'When', 'Where', 'Why', 'How' );
		$potential_entities = array_diff( $potential_entities, $common_words );

		$metrics['potential_entities'] = count( $potential_entities );

		// Check for organization/company markup.
		$has_org_markup = preg_match( '/"@type"\s*:\s*"Organization"/i', $content );
		$metrics['has_organization_schema'] = $has_org_markup;

		// Check for person markup.
		$has_person_markup = preg_match( '/"@type"\s*:\s*"Person"/i', $content );
		$metrics['has_person_schema'] = $has_person_markup;

		// Check for product markup.
		$has_product_markup = preg_match( '/"@type"\s*:\s*"Product"/i', $content );
		$metrics['has_product_schema'] = $has_product_markup;

		if ( $metrics['potential_entities'] > 3 && ! $has_org_markup && ! $has_person_markup ) {
			$recommendations[] = array(
				'priority' => 'medium',
				'action'   => 'Add entity markup',
				'details'  => 'Consider adding Organization or Person schema for mentioned entities',
			);
		}

		return array(
			'issues'          => $issues,
			'recommendations' => $recommendations,
			'metrics'         => $metrics,
		);
	}

	/**
	 * Analyze content depth and comprehensiveness
	 *
	 * @param string $content Page content.
	 * @return array Depth analysis.
	 */
	private function analyze_content_depth( $content ) {
		$issues         = array();
		$recommendations = array();
		$metrics        = array();

		$text = strip_tags( $content );

		// Word count.
		$word_count = str_word_count( $text );
		$metrics['word_count'] = $word_count;

		// Section count (based on H2 headings).
		$section_count = preg_match_all( '/<h2[^>]*>/i', $content, $matches );
		$metrics['section_count'] = $section_count;

		// Check for multimedia.
		$image_count = preg_match_all( '/<img[^>]*>/i', $content, $matches );
		$video_count = preg_match_all( '/<(?:video|iframe)[^>]*>/i', $content, $matches );

		$metrics['image_count'] = $image_count;
		$metrics['video_count'] = $video_count;
		$metrics['has_multimedia'] = ( $image_count > 0 || $video_count > 0 );

		// Comprehensiveness score.
		if ( $word_count < 500 ) {
			$issues[] = array(
				'type'     => 'warning',
				'message'  => 'Content may lack depth',
				'severity' => 'medium',
				'fix'      => sprintf( 'Expand content for more comprehensive coverage (current: %d words)', $word_count ),
			);
		}

		if ( $section_count < 3 && $word_count > 500 ) {
			$recommendations[] = array(
				'priority' => 'low',
				'action'   => 'Break content into more sections',
				'details'  => 'Add more H2 headings to organize content into distinct topics',
			);
		}

		if ( ! $metrics['has_multimedia'] && $word_count > 800 ) {
			$recommendations[] = array(
				'priority' => 'low',
				'action'   => 'Add multimedia content',
				'details'  => 'Include relevant images or videos to enhance understanding',
			);
		}

		return array(
			'issues'          => $issues,
			'recommendations' => $recommendations,
			'metrics'         => $metrics,
		);
	}

	/**
	 * Analyze FAQ schema opportunities
	 *
	 * @param string $content Page content.
	 * @return array FAQ analysis.
	 */
	private function analyze_faq_opportunities( $content ) {
		$issues         = array();
		$recommendations = array();
		$metrics        = array(
			'has_faq_schema'    => false,
			'potential_faqs'    => 0,
			'question_patterns' => 0,
		);

		// Check for existing FAQ schema.
		if ( preg_match( '/"@type"\s*:\s*"FAQPage"/i', $content ) ) {
			$metrics['has_faq_schema'] = true;
		}

		// Check for question patterns.
		$question_words = array( 'what', 'why', 'how', 'when', 'where', 'who', 'which' );
		$text = strip_tags( $content );

		foreach ( $question_words as $word ) {
			$pattern = '/\b' . $word . '\b[^.!?]*\?/i';
			$metrics['question_patterns'] += preg_match_all( $pattern, $text, $matches );
		}

		// Check for headings that look like questions.
		preg_match_all( '/<h[2-4][^>]*>([^<]+)<\/h[2-4]>/i', $content, $matches );
		foreach ( $matches[1] as $heading ) {
			if ( preg_match( '/\?/', $heading ) ) {
				$metrics['potential_faqs']++;
			}
		}

		if ( $metrics['potential_faqs'] >= 2 && ! $metrics['has_faq_schema'] ) {
			$recommendations[] = array(
				'priority' => 'high',
				'action'   => 'Add FAQ schema markup',
				'details'  => sprintf( 'Found %d question-style headings - add FAQPage schema for better AI visibility', $metrics['potential_faqs'] ),
			);
		} elseif ( $metrics['question_patterns'] >= 3 && ! $metrics['has_faq_schema'] ) {
			$recommendations[] = array(
				'priority' => 'medium',
				'action'   => 'Consider FAQ schema',
				'details'  => sprintf( 'Found %d questions in content - consider adding FAQ schema', $metrics['question_patterns'] ),
			);
		}

		return array(
			'issues'          => $issues,
			'recommendations' => $recommendations,
			'metrics'         => $metrics,
		);
	}

	/**
	 * Get suggested schema types based on content
	 *
	 * @param string $post_type Post type.
	 * @param string $content Content.
	 * @return array Suggested schema types.
	 */
	private function get_suggested_schemas( $post_type, $content ) {
		$schemas = array();

		// Article for blog posts and pages.
		if ( in_array( $post_type, array( 'post', 'page' ), true ) ) {
			$schemas[] = 'Article';
		}

		// Check for FAQ patterns.
		if ( preg_match( '/\?/', $content ) ) {
			$schemas[] = 'FAQPage';
		}

		// Check for how-to patterns.
		if ( preg_match( '/step\s+\d+|how\s+to/i', $content ) ) {
			$schemas[] = 'HowTo';
		}

		// Check for product patterns.
		if ( preg_match( '/price|buy|purchase|\$/i', $content ) ) {
			$schemas[] = 'Product';
		}

		return $schemas;
	}

	/**
	 * Calculate AEO score
	 *
	 * @param array $issues Issues found.
	 * @param array $metrics Metrics collected.
	 * @return int Score (0-100).
	 */
	private function calculate_aeo_score( $issues, $metrics ) {
		$score = 100;

		// Deduct points for issues.
		foreach ( $issues as $issue ) {
			switch ( $issue['severity'] ) {
				case 'critical':
					$score -= 25;
					break;
				case 'high':
					$score -= 15;
					break;
				case 'medium':
					$score -= 10;
					break;
				case 'low':
					$score -= 5;
					break;
			}
		}

		// Bonus points for good practices.
		if ( isset( $metrics['has_json_ld'] ) && $metrics['has_json_ld'] ) {
			$score += 10;
		}

		if ( isset( $metrics['has_clear_structure'] ) && $metrics['has_clear_structure'] ) {
			$score += 5;
		}

		if ( isset( $metrics['semantic_tag_count'] ) && $metrics['semantic_tag_count'] >= 3 ) {
			$score += 5;
		}

		if ( isset( $metrics['has_faq_schema'] ) && $metrics['has_faq_schema'] ) {
			$score += 5;
		}

		return max( 0, min( 100, $score ) );
	}

	/**
	 * Calculate structured data score
	 *
	 * @param array $issues Issues found.
	 * @param array $metrics Metrics collected.
	 * @return int Score (0-100).
	 */
	private function calculate_structured_data_score( $issues, $metrics ) {
		$score = 100;

		// Deduct points for issues.
		foreach ( $issues as $issue ) {
			switch ( $issue['severity'] ) {
				case 'critical':
					$score -= 30;
					break;
				case 'high':
					$score -= 20;
					break;
				case 'medium':
					$score -= 10;
					break;
				case 'low':
					$score -= 5;
					break;
			}
		}

		// Bonus points.
		if ( isset( $metrics['has_json_ld'] ) && $metrics['has_json_ld'] ) {
			$score += 20;
		}

		if ( isset( $metrics['schema_types'] ) && count( $metrics['schema_types'] ) >= 2 ) {
			$score += 10;
		}

		return max( 0, min( 100, $score ) );
	}

	/**
	 * Get letter grade from score
	 *
	 * @param int $score Score (0-100).
	 * @return string Letter grade.
	 */
	private function get_grade_from_score( $score ) {
		if ( $score >= 90 ) {
			return 'A';
		} elseif ( $score >= 80 ) {
			return 'B';
		} elseif ( $score >= 70 ) {
			return 'C';
		} elseif ( $score >= 60 ) {
			return 'D';
		} else {
			return 'F';
		}
	}
}
