Implemetacion del scraping ya funcional

This commit is contained in:
Santiago Parra 2024-12-13 23:39:52 +01:00
parent 332d0465b6
commit b44271753d
5 changed files with 113 additions and 1 deletions

View File

@ -13,6 +13,7 @@ from app.widgets.TicTacToeTab import TicTacToeTab
from app.widgets.TodoTab import TodoTab from app.widgets.TodoTab import TodoTab
from app.widgets.UsageLabels import CPULabel, RAMLabel, BatteryLabel, NetworkLabel from app.widgets.UsageLabels import CPULabel, RAMLabel, BatteryLabel, NetworkLabel
from app.widgets.WeatherTab import WeatherTab from app.widgets.WeatherTab import WeatherTab
from app.widgets.WebScrapingTab import WebScrapingTab
stop_event = threading.Event() stop_event = threading.Event()
@ -133,6 +134,11 @@ tic_tac_toe_tab = TicTacToeTab(notebook, stop_event=stop_event)
tic_tac_toe_tab.pack(fill="both", expand=True) tic_tac_toe_tab.pack(fill="both", expand=True)
notebook.add(tic_tac_toe_tab, text="Tic Tac Toe") notebook.add(tic_tac_toe_tab, text="Tic Tac Toe")
# Add the TodoTab to the notebook
web_scraping_tab = WebScrapingTab(notebook, stop_event=stop_event)
web_scraping_tab.pack(fill="both", expand=True)
notebook.add(web_scraping_tab, text="Web Scraping")
# Create the chat and music player frames within the right frame # Create the chat and music player frames within the right frame
frame_chat = tk.Frame(frame_right, bg="lightgreen") frame_chat = tk.Frame(frame_right, bg="lightgreen")
frame_music_player = tk.Frame(frame_right) frame_music_player = tk.Frame(frame_right)

View File

@ -0,0 +1,105 @@
import tkinter as tk
from tkinter import Frame, Button, Label, Entry, Listbox, StringVar, messagebox
import mysql.connector
import requests
from bs4 import BeautifulSoup
from app.widgets.abc import ThreadedTab
class WebScrapingTab(ThreadedTab):
def __init__(self, root: Frame | tk.Tk, stop_event, **kwargs):
# Inicializa los atributos necesarios antes de llamar a la clase base
self.url = StringVar()
self.data = []
self.conn = None # La conexión se inicializa después
super().__init__(root, stop_event, **kwargs) # Llama al constructor de ThreadedTab
self.conn = self.create_database() # Crea o conecta a la base de datos
def build(self):
# Main frame
self.scraping_frame = Frame(self)
self.scraping_frame.pack(fill="both", expand=True)
# Input field for URL
Label(self.scraping_frame, text="Enter URL:", font=("Arial", 12)).pack(pady=5)
Entry(self.scraping_frame, textvariable=self.url, font=("Arial", 12), width=50).pack(pady=5)
# Buttons for actions
Button(self.scraping_frame, text="Scrape", command=self.scrape_website).pack(pady=5)
Button(self.scraping_frame, text="View Data", command=self.view_data).pack(pady=5)
# Listbox to display scraped data
self.data_listbox = Listbox(self.scraping_frame, font=("Arial", 10), width=80, height=20)
self.data_listbox.pack(pady=10)
def create_database(self):
# Connect to MySQL database
conn = mysql.connector.connect(
host="127.0.0.1 ",
user="santipy",
password="1234",
database="scraping_db"
)
cursor = conn.cursor()
# Crear la tabla si no existe
cursor.execute("""
CREATE TABLE IF NOT EXISTS scraped_data (
id INT AUTO_INCREMENT PRIMARY KEY,
title VARCHAR(255),
link TEXT
)
""")
conn.commit()
return conn
def save_to_database(self):
cursor = self.conn.cursor()
query = "INSERT INTO scraped_data (title, link) VALUES (%s, %s)"
cursor.executemany(query, self.data)
self.conn.commit()
def scrape_website(self):
url = self.url.get()
if not url:
messagebox.showwarning("Warning", "Please enter a URL.")
return
try:
response = requests.get(url)
response.raise_for_status()
except requests.RequestException as e:
messagebox.showerror("Error", f"Failed to fetch URL: {e}")
return
soup = BeautifulSoup(response.text, "html.parser")
items = soup.select("h2 a") # Modify selector based on website structure
self.data = [(item.get_text(strip=True), item.get("href")) for item in items]
if self.data:
self.save_to_database()
messagebox.showinfo("Success", f"Scraped {len(self.data)} items and saved to database.")
else:
messagebox.showinfo("No Data", "No data found on the page.")
self.update_listbox()
def update_listbox(self):
self.data_listbox.delete(0, "end")
for title, link in self.data:
self.data_listbox.insert("end", f"Title: {title} | Link: {link}")
def view_data(self):
cursor = self.conn.cursor()
cursor.execute("SELECT title, link FROM scraped_data")
rows = cursor.fetchall()
self.data_listbox.delete(0, "end")
for title, link in rows:
self.data_listbox.insert("end", f"Title: {title} | Link: {link}")
def task(self):
# Placeholder for any background task
pass

View File

@ -1,4 +1,5 @@
from .ClockLabel import ClockLabel from .ClockLabel import ClockLabel
from .UsageLabels import CPULabel, RAMLabel from .UsageLabels import CPULabel, RAMLabel
from .WebScrapingTab import WebScrapingTab
__all__ = ['ClockLabel', 'CPULabel', 'RAMLabel'] __all__ = ['ClockLabel', 'CPULabel', 'RAMLabel', 'WebScrapingTab']

Binary file not shown.