SEO Analyzer Console Application in Python

Source Code:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import re
from collections import Counter
import sys

class SEOAnalyzer:
    def __init__(self, url):
        self.url = url
        self.soup = None
        self.content = None
        self.results = {
            'score': 0,
            'checks': {},
            'suggestions': []
        }
       
    def fetch_url(self):
        """Fetch the URL content"""
        try:
           
            response = requests.get(self.url, timeout=10)
            response.raise_for_status()
            self.content = response.text
            self.soup = BeautifulSoup(self.content, 'html.parser')
            return True
        except requests.exceptions.RequestException as e:
            print(f"Error fetching URL: {e}")
            return False
   
    def check_title(self):
        """Check if title exists and is appropriate length"""
        title = self.soup.find('title')
        if title:
            title_text = title.get_text().strip()
            length = len(title_text)
            self.results['checks']['title'] = {
                'exists': True,
                'length': length,
                'content': title_text
            }
           
            if length == 0:
                self.results['suggestions'].append("Title tag is empty")
            elif length < 30:
                self.results['suggestions'].append(f"Title might be too short ({length} chars). Aim for 50-60 characters.")
            elif length > 60:
                self.results['suggestions'].append(f"Title might be too long ({length} chars). Aim for 50-60 characters.")
        else:
            self.results['checks']['title'] = {'exists': False}
            self.results['suggestions'].append("Missing title tag")
   
    def check_meta_description(self):
        """Check if meta description exists and is appropriate length"""
        meta_desc = self.soup.find('meta', attrs={'name': 'description'})
        if meta_desc and meta_desc.get('content'):
            desc = meta_desc['content'].strip()
            length = len(desc)
            self.results['checks']['meta_description'] = {
                'exists': True,
                'length': length,
                'content': desc
            }
           
            if length == 0:
                self.results['suggestions'].append("Meta description is empty")
            elif length < 120:
                self.results['suggestions'].append(f"Meta description might be too short ({length} chars). Aim for 120-155 characters.")
            elif length > 155:
                self.results['suggestions'].append(f"Meta description might be too long ({length} chars). Aim for 120-155 characters.")
        else:
            self.results['checks']['meta_description'] = {'exists': False}
            self.results['suggestions'].append("Missing meta description")
   
    def check_heading_structure(self):
        """Check heading structure (h1, h2, etc.)"""
        headings = {}
        for i in range(1, 7):
            tag = f'h{i}'
            elements = self.soup.find_all(tag)
            headings[tag] = [el.get_text().strip() for el in elements if el.get_text().strip()]
       
        self.results['checks']['headings'] = headings
       
        # Check for multiple H1s
        h1_count = len(headings.get('h1', []))
        if h1_count == 0:
            self.results['suggestions'].append("No H1 tag found")
        elif h1_count > 1:
            self.results['suggestions'].append(f"Multiple H1 tags found ({h1_count}). Ideally, have only one H1 per page.")
   
    def check_images(self):
        """Check images for alt text"""
        images = self.soup.find_all('img')
        images_without_alt = []
        images_with_alt = []
       
        for img in images:
            if not img.get('alt'):
                images_without_alt.append(img.get('src', 'No src'))
            else:
                images_with_alt.append(img.get('src', 'No src'))
       
        self.results['checks']['images'] = {
            'total': len(images),
            'without_alt': len(images_without_alt),
            'with_alt': len(images_with_alt)
        }
       
        if images_without_alt:
            self.results['suggestions'].append(f"{len(images_without_alt)} images without alt text found")
   
    def check_internal_links(self):
        """Check internal links"""
        parsed_url = urlparse(self.url)
        base_domain = f"{parsed_url.scheme}://{parsed_url.netloc}"
       
        all_links = self.soup.find_all('a', href=True)
        internal_links = []
        external_links = []
       
        for link in all_links:
            href = link['href']
            if href.startswith('/') or href.startswith(base_domain) or href.startswith('#') or not href.startswith('http'):
                internal_links.append(href)
            else:
                external_links.append(href)
       
        self.results['checks']['links'] = {
            'total': len(all_links),
            'internal': len(internal_links),
            'external': len(external_links)
        }
   
    def check_url_structure(self):
        """Check URL structure"""
        parsed_url = urlparse(self.url)
        self.results['checks']['url'] = {
            'length': len(self.url),
            'has_https': parsed_url.scheme == 'https',
            'has_www': parsed_url.netloc.startswith('www.'),
            'path_depth': len([p for p in parsed_url.path.split('/') if p])
        }
       
        # Check for URL parameters
        if parsed_url.query:
            self.results['suggestions'].append("URL contains query parameters which might cause duplicate content issues")
       
        # Check URL length
        if len(self.url) > 100:
            self.results['suggestions'].append(f"URL might be too long ({len(self.url)} characters). Try to keep URLs under 100 characters.")
   
    def check_content_quality(self):
        """Basic content quality checks"""
        # Remove script and style elements
        for script in self.soup(["script", "style"]):
            script.decompose()
       
        text = self.soup.get_text()
        words = re.findall(r'\w+', text.lower())
        word_count = len(words)
       
        # Count word frequency
        word_freq = Counter(words)
        most_common = word_freq.most_common(10)
       
        self.results['checks']['content'] = {
            'word_count': word_count,
            'common_words': most_common
        }
       
        if word_count < 300:
            self.results['suggestions'].append(f"Content might be too thin ({word_count} words). Aim for at least 300 words.")
   
    def calculate_score(self):
        """Calculate an overall SEO score"""
        score = 100
        checks = self.results['checks']
       
        # Title (10 points)
        if not checks.get('title', {}).get('exists'):
            score -= 10
        else:
            length = checks['title']['length']
            if length < 30 or length > 60:
                score -= 5
       
        # Meta description (10 points)
        if not checks.get('meta_description', {}).get('exists'):
            score -= 10
        else:
            length = checks['meta_description']['length']
            if length < 120 or length > 155:
                score -= 5
       
        # Headings (10 points)
        h1_count = len(checks.get('headings', {}).get('h1', []))
        if h1_count == 0:
            score -= 10
        elif h1_count > 1:
            score -= 5
       
        # Images (10 points)
        images = checks.get('images', {})
        if images.get('without_alt', 0) > 0:
            score -= min(10, images['without_alt'] * 2)
       
        # URL (10 points)
        url_info = checks.get('url', {})
        if not url_info.get('has_https'):
            score -= 5
        if url_info.get('length', 0) > 100:
            score -= 5
       
        # Content (20 points)
        content = checks.get('content', {})
        if content.get('word_count', 0) < 300:
            score -= 10
       
        # Links (10 points)
        links = checks.get('links', {})
        if links.get('total', 0) == 0:
            score -= 10
        elif links.get('internal', 0) == 0:
            score -= 5
       
        # Mobile responsiveness (20 points) - would need more sophisticated check
        # For now, we'll assume it's okay if we got this far
       
        self.results['score'] = max(0, score)
   
    def analyze(self):
        """Run all SEO checks"""
        if not self.fetch_url():
            return False
       
        self.check_title()
        self.check_meta_description()
        self.check_heading_structure()
        self.check_images()
        self.check_internal_links()
        self.check_url_structure()
        self.check_content_quality()
        self.calculate_score()
       
        return True
   
    def print_results(self):
        """Print the results in a readable format"""
        print("\n" + "="*60)
        print(f"SEO ANALYSIS REPORT: {self.url}")
        print("="*60)
       
        print(f"\nOverall SEO Score: {self.results['score']}/100")
       
        print("\nDETAILED ANALYSIS:")
        print("-" * 40)
       
        # Title
        title = self.results['checks'].get('title', {})
        if title.get('exists'):
            print(f"✓ Title Tag: {title.get('content', 'N/A')}")
            print(f"  Length: {title.get('length', 0)} characters")
        else:
            print("✗ Title Tag: Missing")
       
        # Meta Description
        meta = self.results['checks'].get('meta_description', {})
        if meta.get('exists'):
            print(f"✓ Meta Description: {meta.get('content', 'N/A')[:100]}...")
            print(f"  Length: {meta.get('length', 0)} characters")
        else:
            print("✗ Meta Description: Missing")
       
        # Headings
        headings = self.results['checks'].get('headings', {})
        print(f"\nHeadings Structure:")
        for i in range(1, 7):
            tag = f'h{i}'
            count = len(headings.get(tag, []))
            print(f"  {tag.upper()}: {count} found")
       
        # Images
        images = self.results['checks'].get('images', {})
        print(f"\nImages: {images.get('total', 0)} total")
        print(f"  With alt text: {images.get('with_alt', 0)}")
        print(f"  Without alt text: {images.get('without_alt', 0)}")
       
        # Links
        links = self.results['checks'].get('links', {})
        print(f"\nLinks: {links.get('total', 0)} total")
        print(f"  Internal: {links.get('internal', 0)}")
        print(f"  External: {links.get('external', 0)}")
       
        # URL
        url_info = self.results['checks'].get('url', {})
        print(f"\nURL Analysis:")
        print(f"  Length: {url_info.get('length', 0)} characters")
        print(f"  HTTPS: {'Yes' if url_info.get('has_https') else 'No'}")
        print(f"  Path depth: {url_info.get('path_depth', 0)}")
       
        # Content
        content = self.results['checks'].get('content', {})
        print(f"\nContent Analysis:")
        print(f"  Word count: {content.get('word_count', 0)}")
        print("  Most common words:", ", ".join([f"{word}({count})" for word, count in content.get('common_words', [])]))
       
        # Suggestions
        if self.results['suggestions']:
            print(f"\nSEO SUGGESTIONS ({len(self.results['suggestions'])}):")
            print("-" * 40)
            for i, suggestion in enumerate(self.results['suggestions'], 1):
                print(f"{i}. {suggestion}")
        else:
            print(f"\nGreat job! No major SEO issues found.")

def main():
    """Main function to run the SEO analyzer"""
    print("SEO Analyzer Console Application")
    print("This tool analyzes basic SEO factors of a website")
   
    if len(sys.argv) > 1:
        url = sys.argv[1]
    else:
        url = input("Enter the URL to analyze (include http/https): ").strip()
   
    if not url.startswith(('http://', 'https://')):
        url = 'https://' + url
   
    print(f"\nAnalyzing {url}...")
   
    analyzer = SEOAnalyzer(url)
    if analyzer.analyze():
        analyzer.print_results()
    else:
        print("Failed to analyze the website. Please check the URL and try again.")

if __name__ == "__main__":
    main()

Output:

SEO Analyzer Console Application This tool analyzes basic SEO factors of a website Enter the URL to analyze (include http/https): https://codesphereacademy.in Analyzing https://codesphereacademy.in... ============================================================ SEO ANALYSIS REPORT: https://codesphereacademy.in ============================================================ Overall SEO Score: 83/100 DETAILED ANALYSIS: ---------------------------------------- ✓ Title Tag: CodeSphere Academy Length: 18 characters ✓ Meta Description: From syntax to success. ✨ We transform beginners into job-ready developers. Learn Python, JavaScript... Length: 191 characters Headings Structure: H1: 4 found H2: 6 found H3: 5 found H4: 16 found H5: 20 found H6: 4 found Images: 20 total With alt text: 19 Without alt text: 1 Links: 32 total Internal: 17 External: 15 URL Analysis: Length: 28 characters HTTPS: Yes Path depth: 0 Content Analysis: Word count: 919 Most common words: and(29), 00(24), to(19), our(18), a(17), webinar(16), in(15), with(12), you(12), of(12) SEO SUGGESTIONS (4): ---------------------------------------- 1. Title might be too short (18 chars). Aim for 50-60 characters. 2. Meta description might be too long (191 chars). Aim for 120-155 characters. 3. Multiple H1 tags found (4). Ideally, have only one H1 per page. 4. 1 images without alt text found

0 Comments