diff --git a/app/main.py b/app/main.py index 4088df4..fa38148 100644 --- a/app/main.py +++ b/app/main.py @@ -13,6 +13,7 @@ from app.widgets.TicTacToeTab import TicTacToeTab from app.widgets.TodoTab import TodoTab from app.widgets.UsageLabels import CPULabel, RAMLabel, BatteryLabel, NetworkLabel from app.widgets.WeatherTab import WeatherTab +from app.widgets.WebScrapingTab import WebScrapingTab stop_event = threading.Event() @@ -133,6 +134,11 @@ tic_tac_toe_tab = TicTacToeTab(notebook, stop_event=stop_event) tic_tac_toe_tab.pack(fill="both", expand=True) notebook.add(tic_tac_toe_tab, text="Tic Tac Toe") +# Add the TodoTab to the notebook +web_scraping_tab = WebScrapingTab(notebook, stop_event=stop_event) +web_scraping_tab.pack(fill="both", expand=True) +notebook.add(web_scraping_tab, text="Web Scraping") + # Create the chat and music player frames within the right frame frame_chat = tk.Frame(frame_right, bg="lightgreen") frame_music_player = tk.Frame(frame_right) diff --git a/app/widgets/WebScrapingTab.py b/app/widgets/WebScrapingTab.py new file mode 100644 index 0000000..9ad9c8b --- /dev/null +++ b/app/widgets/WebScrapingTab.py @@ -0,0 +1,105 @@ +import tkinter as tk +from tkinter import Frame, Button, Label, Entry, Listbox, StringVar, messagebox +import mysql.connector +import requests +from bs4 import BeautifulSoup +from app.widgets.abc import ThreadedTab + +class WebScrapingTab(ThreadedTab): + + def __init__(self, root: Frame | tk.Tk, stop_event, **kwargs): + # Inicializa los atributos necesarios antes de llamar a la clase base + self.url = StringVar() + self.data = [] + self.conn = None # La conexión se inicializa después + super().__init__(root, stop_event, **kwargs) # Llama al constructor de ThreadedTab + self.conn = self.create_database() # Crea o conecta a la base de datos + + def build(self): + # Main frame + self.scraping_frame = Frame(self) + self.scraping_frame.pack(fill="both", expand=True) + + # Input field for URL + Label(self.scraping_frame, text="Enter URL:", font=("Arial", 12)).pack(pady=5) + Entry(self.scraping_frame, textvariable=self.url, font=("Arial", 12), width=50).pack(pady=5) + + # Buttons for actions + Button(self.scraping_frame, text="Scrape", command=self.scrape_website).pack(pady=5) + Button(self.scraping_frame, text="View Data", command=self.view_data).pack(pady=5) + + # Listbox to display scraped data + self.data_listbox = Listbox(self.scraping_frame, font=("Arial", 10), width=80, height=20) + self.data_listbox.pack(pady=10) + + def create_database(self): + # Connect to MySQL database + conn = mysql.connector.connect( + host="127.0.0.1 ", + user="santipy", + password="1234", + database="scraping_db" + ) + cursor = conn.cursor() + + # Crear la tabla si no existe + cursor.execute(""" + CREATE TABLE IF NOT EXISTS scraped_data ( + id INT AUTO_INCREMENT PRIMARY KEY, + title VARCHAR(255), + link TEXT + ) + """) + conn.commit() + return conn + + + def save_to_database(self): + cursor = self.conn.cursor() + query = "INSERT INTO scraped_data (title, link) VALUES (%s, %s)" + cursor.executemany(query, self.data) + self.conn.commit() + + def scrape_website(self): + url = self.url.get() + if not url: + messagebox.showwarning("Warning", "Please enter a URL.") + return + + try: + response = requests.get(url) + response.raise_for_status() + except requests.RequestException as e: + messagebox.showerror("Error", f"Failed to fetch URL: {e}") + return + + soup = BeautifulSoup(response.text, "html.parser") + items = soup.select("h2 a") # Modify selector based on website structure + + self.data = [(item.get_text(strip=True), item.get("href")) for item in items] + + if self.data: + self.save_to_database() + messagebox.showinfo("Success", f"Scraped {len(self.data)} items and saved to database.") + else: + messagebox.showinfo("No Data", "No data found on the page.") + + self.update_listbox() + + def update_listbox(self): + self.data_listbox.delete(0, "end") + for title, link in self.data: + self.data_listbox.insert("end", f"Title: {title} | Link: {link}") + + def view_data(self): + cursor = self.conn.cursor() + cursor.execute("SELECT title, link FROM scraped_data") + rows = cursor.fetchall() + + self.data_listbox.delete(0, "end") + for title, link in rows: + self.data_listbox.insert("end", f"Title: {title} | Link: {link}") + + def task(self): + # Placeholder for any background task + pass diff --git a/app/widgets/__init__.py b/app/widgets/__init__.py index 3f69feb..c811d81 100644 --- a/app/widgets/__init__.py +++ b/app/widgets/__init__.py @@ -1,4 +1,5 @@ from .ClockLabel import ClockLabel from .UsageLabels import CPULabel, RAMLabel +from .WebScrapingTab import WebScrapingTab -__all__ = ['ClockLabel', 'CPULabel', 'RAMLabel'] \ No newline at end of file +__all__ = ['ClockLabel', 'CPULabel', 'RAMLabel', 'WebScrapingTab'] diff --git a/app/widgets/__pycache__/WebScrapingTab.cpython-313.pyc b/app/widgets/__pycache__/WebScrapingTab.cpython-313.pyc new file mode 100644 index 0000000..1f0acfe Binary files /dev/null and b/app/widgets/__pycache__/WebScrapingTab.cpython-313.pyc differ diff --git a/app/widgets/__pycache__/__init__.cpython-313.pyc b/app/widgets/__pycache__/__init__.cpython-313.pyc index 3e32371..a95a39b 100644 Binary files a/app/widgets/__pycache__/__init__.cpython-313.pyc and b/app/widgets/__pycache__/__init__.cpython-313.pyc differ