Source Code:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import re
from collections import Counter
import sys
class SEOAnalyzer:
def __init__(self, url):
self.url = url
self.soup = None
self.content = None
self.results = {
'score': 0,
'checks': {},
'suggestions': []
}
def fetch_url(self):
"""Fetch the URL content"""
try:
response = requests.get(self.url, timeout=10)
response.raise_for_status()
self.content = response.text
self.soup = BeautifulSoup(self.content, 'html.parser')
return True
except requests.exceptions.RequestException as e:
print(f"Error fetching URL: {e}")
return False
def check_title(self):
"""Check if title exists and is appropriate length"""
title = self.soup.find('title')
if title:
title_text = title.get_text().strip()
length = len(title_text)
self.results['checks']['title'] = {
'exists': True,
'length': length,
'content': title_text
}
if length == 0:
self.results['suggestions'].append("Title tag is empty")
elif length < 30:
self.results['suggestions'].append(f"Title might be too short ({length} chars). Aim for 50-60 characters.")
elif length > 60:
self.results['suggestions'].append(f"Title might be too long ({length} chars). Aim for 50-60 characters.")
else:
self.results['checks']['title'] = {'exists': False}
self.results['suggestions'].append("Missing title tag")
def check_meta_description(self):
"""Check if meta description exists and is appropriate length"""
meta_desc = self.soup.find('meta', attrs={'name': 'description'})
if meta_desc and meta_desc.get('content'):
desc = meta_desc['content'].strip()
length = len(desc)
self.results['checks']['meta_description'] = {
'exists': True,
'length': length,
'content': desc
}
if length == 0:
self.results['suggestions'].append("Meta description is empty")
elif length < 120:
self.results['suggestions'].append(f"Meta description might be too short ({length} chars). Aim for 120-155 characters.")
elif length > 155:
self.results['suggestions'].append(f"Meta description might be too long ({length} chars). Aim for 120-155 characters.")
else:
self.results['checks']['meta_description'] = {'exists': False}
self.results['suggestions'].append("Missing meta description")
def check_heading_structure(self):
"""Check heading structure (h1, h2, etc.)"""
headings = {}
for i in range(1, 7):
tag = f'h{i}'
elements = self.soup.find_all(tag)
headings[tag] = [el.get_text().strip() for el in elements if el.get_text().strip()]
self.results['checks']['headings'] = headings
# Check for multiple H1s
h1_count = len(headings.get('h1', []))
if h1_count == 0:
self.results['suggestions'].append("No H1 tag found")
elif h1_count > 1:
self.results['suggestions'].append(f"Multiple H1 tags found ({h1_count}). Ideally, have only one H1 per page.")
def check_images(self):
"""Check images for alt text"""
images = self.soup.find_all('img')
images_without_alt = []
images_with_alt = []
for img in images:
if not img.get('alt'):
images_without_alt.append(img.get('src', 'No src'))
else:
images_with_alt.append(img.get('src', 'No src'))
self.results['checks']['images'] = {
'total': len(images),
'without_alt': len(images_without_alt),
'with_alt': len(images_with_alt)
}
if images_without_alt:
self.results['suggestions'].append(f"{len(images_without_alt)} images without alt text found")
def check_internal_links(self):
"""Check internal links"""
parsed_url = urlparse(self.url)
base_domain = f"{parsed_url.scheme}://{parsed_url.netloc}"
all_links = self.soup.find_all('a', href=True)
internal_links = []
external_links = []
for link in all_links:
href = link['href']
if href.startswith('/') or href.startswith(base_domain) or href.startswith('#') or not href.startswith('http'):
internal_links.append(href)
else:
external_links.append(href)
self.results['checks']['links'] = {
'total': len(all_links),
'internal': len(internal_links),
'external': len(external_links)
}
def check_url_structure(self):
"""Check URL structure"""
parsed_url = urlparse(self.url)
self.results['checks']['url'] = {
'length': len(self.url),
'has_https': parsed_url.scheme == 'https',
'has_www': parsed_url.netloc.startswith('www.'),
'path_depth': len([p for p in parsed_url.path.split('/') if p])
}
# Check for URL parameters
if parsed_url.query:
self.results['suggestions'].append("URL contains query parameters which might cause duplicate content issues")
# Check URL length
if len(self.url) > 100:
self.results['suggestions'].append(f"URL might be too long ({len(self.url)} characters). Try to keep URLs under 100 characters.")
def check_content_quality(self):
"""Basic content quality checks"""
# Remove script and style elements
for script in self.soup(["script", "style"]):
script.decompose()
text = self.soup.get_text()
words = re.findall(r'\w+', text.lower())
word_count = len(words)
# Count word frequency
word_freq = Counter(words)
most_common = word_freq.most_common(10)
self.results['checks']['content'] = {
'word_count': word_count,
'common_words': most_common
}
if word_count < 300:
self.results['suggestions'].append(f"Content might be too thin ({word_count} words). Aim for at least 300 words.")
def calculate_score(self):
"""Calculate an overall SEO score"""
score = 100
checks = self.results['checks']
# Title (10 points)
if not checks.get('title', {}).get('exists'):
score -= 10
else:
length = checks['title']['length']
if length < 30 or length > 60:
score -= 5
# Meta description (10 points)
if not checks.get('meta_description', {}).get('exists'):
score -= 10
else:
length = checks['meta_description']['length']
if length < 120 or length > 155:
score -= 5
# Headings (10 points)
h1_count = len(checks.get('headings', {}).get('h1', []))
if h1_count == 0:
score -= 10
elif h1_count > 1:
score -= 5
# Images (10 points)
images = checks.get('images', {})
if images.get('without_alt', 0) > 0:
score -= min(10, images['without_alt'] * 2)
# URL (10 points)
url_info = checks.get('url', {})
if not url_info.get('has_https'):
score -= 5
if url_info.get('length', 0) > 100:
score -= 5
# Content (20 points)
content = checks.get('content', {})
if content.get('word_count', 0) < 300:
score -= 10
# Links (10 points)
links = checks.get('links', {})
if links.get('total', 0) == 0:
score -= 10
elif links.get('internal', 0) == 0:
score -= 5
# Mobile responsiveness (20 points) - would need more sophisticated check
# For now, we'll assume it's okay if we got this far
self.results['score'] = max(0, score)
def analyze(self):
"""Run all SEO checks"""
if not self.fetch_url():
return False
self.check_title()
self.check_meta_description()
self.check_heading_structure()
self.check_images()
self.check_internal_links()
self.check_url_structure()
self.check_content_quality()
self.calculate_score()
return True
def print_results(self):
"""Print the results in a readable format"""
print("\n" + "="*60)
print(f"SEO ANALYSIS REPORT: {self.url}")
print("="*60)
print(f"\nOverall SEO Score: {self.results['score']}/100")
print("\nDETAILED ANALYSIS:")
print("-" * 40)
# Title
title = self.results['checks'].get('title', {})
if title.get('exists'):
print(f"✓ Title Tag: {title.get('content', 'N/A')}")
print(f" Length: {title.get('length', 0)} characters")
else:
print("✗ Title Tag: Missing")
# Meta Description
meta = self.results['checks'].get('meta_description', {})
if meta.get('exists'):
print(f"✓ Meta Description: {meta.get('content', 'N/A')[:100]}...")
print(f" Length: {meta.get('length', 0)} characters")
else:
print("✗ Meta Description: Missing")
# Headings
headings = self.results['checks'].get('headings', {})
print(f"\nHeadings Structure:")
for i in range(1, 7):
tag = f'h{i}'
count = len(headings.get(tag, []))
print(f" {tag.upper()}: {count} found")
# Images
images = self.results['checks'].get('images', {})
print(f"\nImages: {images.get('total', 0)} total")
print(f" With alt text: {images.get('with_alt', 0)}")
print(f" Without alt text: {images.get('without_alt', 0)}")
# Links
links = self.results['checks'].get('links', {})
print(f"\nLinks: {links.get('total', 0)} total")
print(f" Internal: {links.get('internal', 0)}")
print(f" External: {links.get('external', 0)}")
# URL
url_info = self.results['checks'].get('url', {})
print(f"\nURL Analysis:")
print(f" Length: {url_info.get('length', 0)} characters")
print(f" HTTPS: {'Yes' if url_info.get('has_https') else 'No'}")
print(f" Path depth: {url_info.get('path_depth', 0)}")
# Content
content = self.results['checks'].get('content', {})
print(f"\nContent Analysis:")
print(f" Word count: {content.get('word_count', 0)}")
print(" Most common words:", ", ".join([f"{word}({count})" for word, count in content.get('common_words', [])]))
# Suggestions
if self.results['suggestions']:
print(f"\nSEO SUGGESTIONS ({len(self.results['suggestions'])}):")
print("-" * 40)
for i, suggestion in enumerate(self.results['suggestions'], 1):
print(f"{i}. {suggestion}")
else:
print(f"\nGreat job! No major SEO issues found.")
def main():
"""Main function to run the SEO analyzer"""
print("SEO Analyzer Console Application")
print("This tool analyzes basic SEO factors of a website")
if len(sys.argv) > 1:
url = sys.argv[1]
else:
url = input("Enter the URL to analyze (include http/https): ").strip()
if not url.startswith(('http://', 'https://')):
url = 'https://' + url
print(f"\nAnalyzing {url}...")
analyzer = SEOAnalyzer(url)
if analyzer.analyze():
analyzer.print_results()
else:
print("Failed to analyze the website. Please check the URL and try again.")
if __name__ == "__main__":
main()
Output:
SEO Analyzer Console Application
This tool analyzes basic SEO factors of a website
Enter the URL to analyze (include http/https): https://codesphereacademy.in
Analyzing https://codesphereacademy.in...
============================================================
SEO ANALYSIS REPORT: https://codesphereacademy.in
============================================================
Overall SEO Score: 83/100
DETAILED ANALYSIS:
----------------------------------------
✓ Title Tag: CodeSphere Academy
Length: 18 characters
✓ Meta Description: From syntax to success. ✨ We transform beginners into job-ready developers. Learn Python, JavaScript...
Length: 191 characters
Headings Structure:
H1: 4 found
H2: 6 found
H3: 5 found
H4: 16 found
H5: 20 found
H6: 4 found
Images: 20 total
With alt text: 19
Without alt text: 1
Links: 32 total
Internal: 17
External: 15
URL Analysis:
Length: 28 characters
HTTPS: Yes
Path depth: 0
Content Analysis:
Word count: 919
Most common words: and(29), 00(24), to(19), our(18), a(17), webinar(16), in(15), with(12), you(12), of(12)
SEO SUGGESTIONS (4):
----------------------------------------
1. Title might be too short (18 chars). Aim for 50-60 characters.
2. Meta description might be too long (191 chars). Aim for 120-155 characters.
3. Multiple H1 tags found (4). Ideally, have only one H1 per page.
4. 1 images without alt text found
0 Comments