Implemetacion del scraping ya funcional

2024-12-13 23:39:52 +01:00 · 2024-12-13 23:39:52 +01:00 · b44271753d
parent 332d0465b6
commit b44271753d
5 changed files with 113 additions and 1 deletions
--- a/app/main.py
+++ b/app/main.py
@ -13,6 +13,7 @@ from app.widgets.TicTacToeTab import TicTacToeTab
 from app.widgets.TodoTab import TodoTab
 from app.widgets.UsageLabels import CPULabel, RAMLabel, BatteryLabel, NetworkLabel
 from app.widgets.WeatherTab import WeatherTab
+from app.widgets.WebScrapingTab import WebScrapingTab

 stop_event = threading.Event()

@ -133,6 +134,11 @@ tic_tac_toe_tab =   TicTacToeTab(notebook, stop_event=stop_event)
 tic_tac_toe_tab.pack(fill="both", expand=True)
 notebook.add(tic_tac_toe_tab, text="Tic Tac Toe")

+# Add the TodoTab to the notebook
+web_scraping_tab = WebScrapingTab(notebook, stop_event=stop_event)
+web_scraping_tab.pack(fill="both", expand=True)
+notebook.add(web_scraping_tab, text="Web Scraping")
+
 # Create the chat and music player frames within the right frame
 frame_chat = tk.Frame(frame_right, bg="lightgreen")
 frame_music_player = tk.Frame(frame_right)
--- a/app/widgets/WebScrapingTab.py
+++ b/app/widgets/WebScrapingTab.py
@ -0,0 +1,105 @@
+import tkinter as tk
+from tkinter import Frame, Button, Label, Entry, Listbox, StringVar, messagebox
+import mysql.connector
+import requests
+from bs4 import BeautifulSoup
+from app.widgets.abc import ThreadedTab
+
+class WebScrapingTab(ThreadedTab):
+
+    def __init__(self, root: Frame | tk.Tk, stop_event, **kwargs):
+        # Inicializa los atributos necesarios antes de llamar a la clase base
+        self.url = StringVar()
+        self.data = []
+        self.conn = None  # La conexión se inicializa después
+        super().__init__(root, stop_event, **kwargs)  # Llama al constructor de ThreadedTab
+        self.conn = self.create_database()  # Crea o conecta a la base de datos
+
+    def build(self):
+        # Main frame
+        self.scraping_frame = Frame(self)
+        self.scraping_frame.pack(fill="both", expand=True)
+
+        # Input field for URL
+        Label(self.scraping_frame, text="Enter URL:", font=("Arial", 12)).pack(pady=5)
+        Entry(self.scraping_frame, textvariable=self.url, font=("Arial", 12), width=50).pack(pady=5)
+
+        # Buttons for actions
+        Button(self.scraping_frame, text="Scrape", command=self.scrape_website).pack(pady=5)
+        Button(self.scraping_frame, text="View Data", command=self.view_data).pack(pady=5)
+
+        # Listbox to display scraped data
+        self.data_listbox = Listbox(self.scraping_frame, font=("Arial", 10), width=80, height=20)
+        self.data_listbox.pack(pady=10)
+
+    def create_database(self):
+        # Connect to MySQL database
+        conn = mysql.connector.connect(
+            host="127.0.0.1 ",
+            user="santipy",
+            password="1234",
+            database="scraping_db"
+        )
+        cursor = conn.cursor()
+
+        # Crear la tabla si no existe
+        cursor.execute("""
+                   CREATE TABLE IF NOT EXISTS scraped_data (
+                       id INT AUTO_INCREMENT PRIMARY KEY,
+                       title VARCHAR(255),
+                       link TEXT
+                   )
+               """)
+        conn.commit()
+        return conn
+
+
+    def save_to_database(self):
+        cursor = self.conn.cursor()
+        query = "INSERT INTO scraped_data (title, link) VALUES (%s, %s)"
+        cursor.executemany(query, self.data)
+        self.conn.commit()
+
+    def scrape_website(self):
+        url = self.url.get()
+        if not url:
+            messagebox.showwarning("Warning", "Please enter a URL.")
+            return
+
+        try:
+            response = requests.get(url)
+            response.raise_for_status()
+        except requests.RequestException as e:
+            messagebox.showerror("Error", f"Failed to fetch URL: {e}")
+            return
+
+        soup = BeautifulSoup(response.text, "html.parser")
+        items = soup.select("h2 a")  # Modify selector based on website structure
+
+        self.data = [(item.get_text(strip=True), item.get("href")) for item in items]
+
+        if self.data:
+            self.save_to_database()
+            messagebox.showinfo("Success", f"Scraped {len(self.data)} items and saved to database.")
+        else:
+            messagebox.showinfo("No Data", "No data found on the page.")
+
+        self.update_listbox()
+
+    def update_listbox(self):
+        self.data_listbox.delete(0, "end")
+        for title, link in self.data:
+            self.data_listbox.insert("end", f"Title: {title} | Link: {link}")
+
+    def view_data(self):
+        cursor = self.conn.cursor()
+        cursor.execute("SELECT title, link FROM scraped_data")
+        rows = cursor.fetchall()
+
+        self.data_listbox.delete(0, "end")
+        for title, link in rows:
+            self.data_listbox.insert("end", f"Title: {title} | Link: {link}")
+
+    def task(self):
+        # Placeholder for any background task
+        pass
--- a/app/widgets/init.py
+++ b/app/widgets/init.py
@ -1,4 +1,5 @@
 from .ClockLabel import ClockLabel
 from .UsageLabels import CPULabel, RAMLabel
+from .WebScrapingTab import WebScrapingTab

-__all__ = ['ClockLabel', 'CPULabel', 'RAMLabel']
+__all__ = ['ClockLabel', 'CPULabel', 'RAMLabel', 'WebScrapingTab']
--- a/app/widgets/pycache/WebScrapingTab.cpython-313.pyc
+++ b/app/widgets/pycache/WebScrapingTab.cpython-313.pyc
--- a/app/widgets/pycache/init.cpython-313.pyc
+++ b/app/widgets/pycache/init.cpython-313.pyc