Implemetacion del scraping ya funcional
This commit is contained in:
parent
332d0465b6
commit
b44271753d
|
@ -13,6 +13,7 @@ from app.widgets.TicTacToeTab import TicTacToeTab
|
||||||
from app.widgets.TodoTab import TodoTab
|
from app.widgets.TodoTab import TodoTab
|
||||||
from app.widgets.UsageLabels import CPULabel, RAMLabel, BatteryLabel, NetworkLabel
|
from app.widgets.UsageLabels import CPULabel, RAMLabel, BatteryLabel, NetworkLabel
|
||||||
from app.widgets.WeatherTab import WeatherTab
|
from app.widgets.WeatherTab import WeatherTab
|
||||||
|
from app.widgets.WebScrapingTab import WebScrapingTab
|
||||||
|
|
||||||
stop_event = threading.Event()
|
stop_event = threading.Event()
|
||||||
|
|
||||||
|
@ -133,6 +134,11 @@ tic_tac_toe_tab = TicTacToeTab(notebook, stop_event=stop_event)
|
||||||
tic_tac_toe_tab.pack(fill="both", expand=True)
|
tic_tac_toe_tab.pack(fill="both", expand=True)
|
||||||
notebook.add(tic_tac_toe_tab, text="Tic Tac Toe")
|
notebook.add(tic_tac_toe_tab, text="Tic Tac Toe")
|
||||||
|
|
||||||
|
# Add the TodoTab to the notebook
|
||||||
|
web_scraping_tab = WebScrapingTab(notebook, stop_event=stop_event)
|
||||||
|
web_scraping_tab.pack(fill="both", expand=True)
|
||||||
|
notebook.add(web_scraping_tab, text="Web Scraping")
|
||||||
|
|
||||||
# Create the chat and music player frames within the right frame
|
# Create the chat and music player frames within the right frame
|
||||||
frame_chat = tk.Frame(frame_right, bg="lightgreen")
|
frame_chat = tk.Frame(frame_right, bg="lightgreen")
|
||||||
frame_music_player = tk.Frame(frame_right)
|
frame_music_player = tk.Frame(frame_right)
|
||||||
|
|
|
@ -0,0 +1,105 @@
|
||||||
|
import tkinter as tk
|
||||||
|
from tkinter import Frame, Button, Label, Entry, Listbox, StringVar, messagebox
|
||||||
|
import mysql.connector
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from app.widgets.abc import ThreadedTab
|
||||||
|
|
||||||
|
class WebScrapingTab(ThreadedTab):
|
||||||
|
|
||||||
|
def __init__(self, root: Frame | tk.Tk, stop_event, **kwargs):
|
||||||
|
# Inicializa los atributos necesarios antes de llamar a la clase base
|
||||||
|
self.url = StringVar()
|
||||||
|
self.data = []
|
||||||
|
self.conn = None # La conexión se inicializa después
|
||||||
|
super().__init__(root, stop_event, **kwargs) # Llama al constructor de ThreadedTab
|
||||||
|
self.conn = self.create_database() # Crea o conecta a la base de datos
|
||||||
|
|
||||||
|
def build(self):
|
||||||
|
# Main frame
|
||||||
|
self.scraping_frame = Frame(self)
|
||||||
|
self.scraping_frame.pack(fill="both", expand=True)
|
||||||
|
|
||||||
|
# Input field for URL
|
||||||
|
Label(self.scraping_frame, text="Enter URL:", font=("Arial", 12)).pack(pady=5)
|
||||||
|
Entry(self.scraping_frame, textvariable=self.url, font=("Arial", 12), width=50).pack(pady=5)
|
||||||
|
|
||||||
|
# Buttons for actions
|
||||||
|
Button(self.scraping_frame, text="Scrape", command=self.scrape_website).pack(pady=5)
|
||||||
|
Button(self.scraping_frame, text="View Data", command=self.view_data).pack(pady=5)
|
||||||
|
|
||||||
|
# Listbox to display scraped data
|
||||||
|
self.data_listbox = Listbox(self.scraping_frame, font=("Arial", 10), width=80, height=20)
|
||||||
|
self.data_listbox.pack(pady=10)
|
||||||
|
|
||||||
|
def create_database(self):
|
||||||
|
# Connect to MySQL database
|
||||||
|
conn = mysql.connector.connect(
|
||||||
|
host="127.0.0.1 ",
|
||||||
|
user="santipy",
|
||||||
|
password="1234",
|
||||||
|
database="scraping_db"
|
||||||
|
)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# Crear la tabla si no existe
|
||||||
|
cursor.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS scraped_data (
|
||||||
|
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||||
|
title VARCHAR(255),
|
||||||
|
link TEXT
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.commit()
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
def save_to_database(self):
|
||||||
|
cursor = self.conn.cursor()
|
||||||
|
query = "INSERT INTO scraped_data (title, link) VALUES (%s, %s)"
|
||||||
|
cursor.executemany(query, self.data)
|
||||||
|
self.conn.commit()
|
||||||
|
|
||||||
|
def scrape_website(self):
|
||||||
|
url = self.url.get()
|
||||||
|
if not url:
|
||||||
|
messagebox.showwarning("Warning", "Please enter a URL.")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
except requests.RequestException as e:
|
||||||
|
messagebox.showerror("Error", f"Failed to fetch URL: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
|
items = soup.select("h2 a") # Modify selector based on website structure
|
||||||
|
|
||||||
|
self.data = [(item.get_text(strip=True), item.get("href")) for item in items]
|
||||||
|
|
||||||
|
if self.data:
|
||||||
|
self.save_to_database()
|
||||||
|
messagebox.showinfo("Success", f"Scraped {len(self.data)} items and saved to database.")
|
||||||
|
else:
|
||||||
|
messagebox.showinfo("No Data", "No data found on the page.")
|
||||||
|
|
||||||
|
self.update_listbox()
|
||||||
|
|
||||||
|
def update_listbox(self):
|
||||||
|
self.data_listbox.delete(0, "end")
|
||||||
|
for title, link in self.data:
|
||||||
|
self.data_listbox.insert("end", f"Title: {title} | Link: {link}")
|
||||||
|
|
||||||
|
def view_data(self):
|
||||||
|
cursor = self.conn.cursor()
|
||||||
|
cursor.execute("SELECT title, link FROM scraped_data")
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
|
||||||
|
self.data_listbox.delete(0, "end")
|
||||||
|
for title, link in rows:
|
||||||
|
self.data_listbox.insert("end", f"Title: {title} | Link: {link}")
|
||||||
|
|
||||||
|
def task(self):
|
||||||
|
# Placeholder for any background task
|
||||||
|
pass
|
|
@ -1,4 +1,5 @@
|
||||||
from .ClockLabel import ClockLabel
|
from .ClockLabel import ClockLabel
|
||||||
from .UsageLabels import CPULabel, RAMLabel
|
from .UsageLabels import CPULabel, RAMLabel
|
||||||
|
from .WebScrapingTab import WebScrapingTab
|
||||||
|
|
||||||
__all__ = ['ClockLabel', 'CPULabel', 'RAMLabel']
|
__all__ = ['ClockLabel', 'CPULabel', 'RAMLabel', 'WebScrapingTab']
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue