ProyectoFinalPython/app/widgets/WebScrapingTab.py

124 lines
4.4 KiB
Python

import tkinter as tk
from tkinter import Frame, Button, Label, Entry, Listbox, StringVar, messagebox
import mysql.connector
import requests
from bs4 import BeautifulSoup
from app.widgets.abc import ThreadedTab
class WebScrapingTab(ThreadedTab):
def __init__(self, root: Frame | tk.Tk, stop_event, **kwargs):
# Inicializa los atributos necesarios antes de llamar a la clase base
self.url = StringVar()
self.data = []
self.conn = None # La conexión se inicializa después
super().__init__(root, stop_event, **kwargs) # Llama al constructor de ThreadedTab
self.conn = self.create_database() # Crea o conecta a la base de datos
def build(self):
# Main frame
self.scraping_frame = Frame(self)
self.scraping_frame.pack(fill="both", expand=True)
# Input field for URL
Label(self.scraping_frame, text="Enter URL:", font=("Arial", 12)).pack(pady=5)
Entry(self.scraping_frame, textvariable=self.url, font=("Arial", 12), width=50).pack(pady=5)
# Buttons for actions
Button(self.scraping_frame, text="Scrape", command=self.scrape_website).pack(pady=5)
Button(self.scraping_frame, text="View Data", command=self.view_data).pack(pady=5)
# Listbox to display scraped data
self.data_listbox = Listbox(self.scraping_frame, font=("Arial", 10), width=80, height=20)
self.data_listbox.pack(pady=10)
def create_database(self):
try:
# Conectar sin especificar la base de datos
conn = mysql.connector.connect(
host="127.0.0.1",
user="santipy",
password="1234"
)
cursor = conn.cursor()
# Crear la base de datos si no existe
cursor.execute("CREATE DATABASE IF NOT EXISTS scraping_db CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci")
conn.commit()
# Conectar a la base de datos
conn = mysql.connector.connect(
host="127.0.0.1",
user="santipy",
password="1234",
database="scraping_db"
)
# Crear la tabla si no existe
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS scraped_data (
id INT AUTO_INCREMENT PRIMARY KEY,
title VARCHAR(255),
link TEXT
)
""")
conn.commit()
return conn
except mysql.connector.Error as err:
print(f"Error al conectar o crear la base de datos: {err}")
messagebox.showerror("Database Error", f"Error al conectar o crear la base de datos: {err}")
return None
def save_to_database(self):
cursor = self.conn.cursor()
query = "INSERT INTO scraped_data (title, link) VALUES (%s, %s)"
cursor.executemany(query, self.data)
self.conn.commit()
def scrape_website(self):
url = self.url.get()
if not url:
messagebox.showwarning("Warning", "Please enter a URL.")
return
try:
response = requests.get(url)
response.raise_for_status()
except requests.RequestException as e:
messagebox.showerror("Error", f"Failed to fetch URL: {e}")
return
soup = BeautifulSoup(response.text, "html.parser")
items = soup.select("h2 a") # Modify selector based on website structure
self.data = [(item.get_text(strip=True), item.get("href")) for item in items]
if self.data:
self.save_to_database()
messagebox.showinfo("Success", f"Scraped {len(self.data)} items and saved to database.")
else:
messagebox.showinfo("No Data", "No data found on the page.")
self.update_listbox()
def update_listbox(self):
self.data_listbox.delete(0, "end")
for title, link in self.data:
self.data_listbox.insert("end", f"Title: {title} | Link: {link}")
def view_data(self):
cursor = self.conn.cursor()
cursor.execute("SELECT title, link FROM scraped_data")
rows = cursor.fetchall()
self.data_listbox.delete(0, "end")
for title, link in rows:
self.data_listbox.insert("end", f"Title: {title} | Link: {link}")
def task(self):
# Placeholder for any background task
pass