From b44271753d3889f367b28c44afab24243feaba23 Mon Sep 17 00:00:00 2001 From: Santiago Parra Date: Fri, 13 Dec 2024 23:39:52 +0100 Subject: [PATCH] Implemetacion del scraping ya funcional --- app/main.py | 6 + app/widgets/WebScrapingTab.py | 105 ++++++++++++++++++ app/widgets/__init__.py | 3 +- .../WebScrapingTab.cpython-313.pyc | Bin 0 -> 6334 bytes .../__pycache__/__init__.cpython-313.pyc | Bin 296 -> 339 bytes 5 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 app/widgets/WebScrapingTab.py create mode 100644 app/widgets/__pycache__/WebScrapingTab.cpython-313.pyc diff --git a/app/main.py b/app/main.py index 4088df4..fa38148 100644 --- a/app/main.py +++ b/app/main.py @@ -13,6 +13,7 @@ from app.widgets.TicTacToeTab import TicTacToeTab from app.widgets.TodoTab import TodoTab from app.widgets.UsageLabels import CPULabel, RAMLabel, BatteryLabel, NetworkLabel from app.widgets.WeatherTab import WeatherTab +from app.widgets.WebScrapingTab import WebScrapingTab stop_event = threading.Event() @@ -133,6 +134,11 @@ tic_tac_toe_tab = TicTacToeTab(notebook, stop_event=stop_event) tic_tac_toe_tab.pack(fill="both", expand=True) notebook.add(tic_tac_toe_tab, text="Tic Tac Toe") +# Add the TodoTab to the notebook +web_scraping_tab = WebScrapingTab(notebook, stop_event=stop_event) +web_scraping_tab.pack(fill="both", expand=True) +notebook.add(web_scraping_tab, text="Web Scraping") + # Create the chat and music player frames within the right frame frame_chat = tk.Frame(frame_right, bg="lightgreen") frame_music_player = tk.Frame(frame_right) diff --git a/app/widgets/WebScrapingTab.py b/app/widgets/WebScrapingTab.py new file mode 100644 index 0000000..9ad9c8b --- /dev/null +++ b/app/widgets/WebScrapingTab.py @@ -0,0 +1,105 @@ +import tkinter as tk +from tkinter import Frame, Button, Label, Entry, Listbox, StringVar, messagebox +import mysql.connector +import requests +from bs4 import BeautifulSoup +from app.widgets.abc import ThreadedTab + +class WebScrapingTab(ThreadedTab): + + def __init__(self, root: Frame | tk.Tk, stop_event, **kwargs): + # Inicializa los atributos necesarios antes de llamar a la clase base + self.url = StringVar() + self.data = [] + self.conn = None # La conexión se inicializa después + super().__init__(root, stop_event, **kwargs) # Llama al constructor de ThreadedTab + self.conn = self.create_database() # Crea o conecta a la base de datos + + def build(self): + # Main frame + self.scraping_frame = Frame(self) + self.scraping_frame.pack(fill="both", expand=True) + + # Input field for URL + Label(self.scraping_frame, text="Enter URL:", font=("Arial", 12)).pack(pady=5) + Entry(self.scraping_frame, textvariable=self.url, font=("Arial", 12), width=50).pack(pady=5) + + # Buttons for actions + Button(self.scraping_frame, text="Scrape", command=self.scrape_website).pack(pady=5) + Button(self.scraping_frame, text="View Data", command=self.view_data).pack(pady=5) + + # Listbox to display scraped data + self.data_listbox = Listbox(self.scraping_frame, font=("Arial", 10), width=80, height=20) + self.data_listbox.pack(pady=10) + + def create_database(self): + # Connect to MySQL database + conn = mysql.connector.connect( + host="127.0.0.1 ", + user="santipy", + password="1234", + database="scraping_db" + ) + cursor = conn.cursor() + + # Crear la tabla si no existe + cursor.execute(""" + CREATE TABLE IF NOT EXISTS scraped_data ( + id INT AUTO_INCREMENT PRIMARY KEY, + title VARCHAR(255), + link TEXT + ) + """) + conn.commit() + return conn + + + def save_to_database(self): + cursor = self.conn.cursor() + query = "INSERT INTO scraped_data (title, link) VALUES (%s, %s)" + cursor.executemany(query, self.data) + self.conn.commit() + + def scrape_website(self): + url = self.url.get() + if not url: + messagebox.showwarning("Warning", "Please enter a URL.") + return + + try: + response = requests.get(url) + response.raise_for_status() + except requests.RequestException as e: + messagebox.showerror("Error", f"Failed to fetch URL: {e}") + return + + soup = BeautifulSoup(response.text, "html.parser") + items = soup.select("h2 a") # Modify selector based on website structure + + self.data = [(item.get_text(strip=True), item.get("href")) for item in items] + + if self.data: + self.save_to_database() + messagebox.showinfo("Success", f"Scraped {len(self.data)} items and saved to database.") + else: + messagebox.showinfo("No Data", "No data found on the page.") + + self.update_listbox() + + def update_listbox(self): + self.data_listbox.delete(0, "end") + for title, link in self.data: + self.data_listbox.insert("end", f"Title: {title} | Link: {link}") + + def view_data(self): + cursor = self.conn.cursor() + cursor.execute("SELECT title, link FROM scraped_data") + rows = cursor.fetchall() + + self.data_listbox.delete(0, "end") + for title, link in rows: + self.data_listbox.insert("end", f"Title: {title} | Link: {link}") + + def task(self): + # Placeholder for any background task + pass diff --git a/app/widgets/__init__.py b/app/widgets/__init__.py index 3f69feb..c811d81 100644 --- a/app/widgets/__init__.py +++ b/app/widgets/__init__.py @@ -1,4 +1,5 @@ from .ClockLabel import ClockLabel from .UsageLabels import CPULabel, RAMLabel +from .WebScrapingTab import WebScrapingTab -__all__ = ['ClockLabel', 'CPULabel', 'RAMLabel'] \ No newline at end of file +__all__ = ['ClockLabel', 'CPULabel', 'RAMLabel', 'WebScrapingTab'] diff --git a/app/widgets/__pycache__/WebScrapingTab.cpython-313.pyc b/app/widgets/__pycache__/WebScrapingTab.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1f0acfe3b5f8111814cf2affd1baf50ac0182b43 GIT binary patch literal 6334 zcmbUlTW}NC^{%ult*o`=hYZHXcx?<=fjEKKp(Y`%0TD39X0(!lV4|+2l`Skw%H0(Y zPdd%?qhn9g5}IKk)6T^CX-L1wv@;>ok1(C)qy3VwQ)w=wO{e)N{qg{vWYT{0oRwBK zLL`%1Tj##cp8GiOd%wQEjzIav2NP56UPAtf9jgfC#{CpDE)#_)+z~=KsBK5cu#MV= z?bJTZQ+`;W!mxunhMm+oEK*S};ksy@`Hh>pp|&59hU;m)ImaLI411|pCat8KC_)=i z924FFduct|FvaZF!rLLg<(3(Pr^(2dFTdFE0K znn+EJtCZEvYPzmYX)x^PSp7at%^Hb#HmRhu85nSfrm3dJv{*=;jFwg`!SeFtz5)H0 z;lLB-VW~~ws9mv5@DoB%TKhq%{iKtSep{IIbBeuQWp4>l7zI zw;}?R6c@mHr4FD+aZ_)Y?B!@f5AjRPb6lHLqEyYmSzv#6MOcIJG@Ul0*7k5!rW6>6t&3A2RwFgfibz>yP8{kEq`*nWU} zmXiTHSYJk2Q>|I2LTie=#{tWzxodltCK(@74VCfHbP719XEPdQVkDACC5%Xfd85Dx zLyI8Jq^fIvf$_SQj8hOq!s>J*or!2?wUogeGv`z~r88F~5>2YQ9*O991u~S^*uMRH zCc?T#^$A@~8HtHw7oyWDojpd=XSArHPaI67)Z~Pk$xNI}#HKVDdH9n1G8gDNTycHX zxV2H#5dC=oU%o~@Z{GOkbMyS`V$rt_Dp%3hiq##le!+Q5Y`yBsi~ZNeZ;AadQSkNV zeZ5P*C+7K|yY9pHBwAq!bU(h6)Set8xiiN@@*2t#FD> zu@Bm0athZlOYN@}UJ(?>puHv)ry>q=K*Logfr3zD2QX@y5DTkR16`L=H^_f2<%(F< zrmVPO-19J9>?zVW?D(7L)&t#!??AWN`sOkF^(bDYL1`QWMM_rg1aLvAs!Pp#X)4|L z6!ABYGJY~`Oo#k7#>W%MBy(uzGioX}R|keqqjESnvIpjbA(~K=chM{%rlFlT06(2F z?xI|Pwc|5t?84n$*s(JS=g+F}$z)QaTVaq1pcBS4bC^a)^V{ea9Cb$1v$MFGYdoQy zlMjG`!qjG%x{K$!dkg`d%5OKx(CyG>j%h71HEqy7oN!Z=ZRXL-)+`b?%{c5QIzw@D z)PbN00l$i=Mb2rHdcx3{>ntp7+6IbJ%p^1ukz~odFEpUPgma4 zbxpdlH!mJ8h@*LNG#3izurUrAZy{XgKM>y+Z~Q118qbTz3*tmxoXAZ^bJ&O>Det^d z@OJ0D-Pc-g^cS8R&BMPqRuDsZF_e4hR1O;xcSKJGt^G#qqtu6~ym+)A2J>Ptcj9CY z8!rL1j-JH>g`VB{p4~TQb4ob(@(=SHRXolH^Eeyw;)a5_DKBnX+?NwK-4eIaO>pqq zmggGGfaL*LDz)^xrlgt{2Cc^+pW?z?uwrK{N`zPrB4rEPf)yLGI9h{2kOjZM#0l5qUCDl{#A$0qpIu=28KcbIW)9yBp@F?D36YXRQZ{N4dAw$4Dy? zlMj!EmmzB#fjcj}Ha+9~uhw?iv{IKNdBa zNX^Khz==>TTK?)mzk><07xY(>%!OVUJYt%new$6^h-Rsd1KN2lngy+MpynqG&}-B$ ziaJgM0Cd!5IH9R)bUk#~rYcRV^5$qQ^y2_P=e)j)KVJB8!Mio@-CAtyC^ohgTiS~) z-77BAux*714R!PUibR^$U1`76{$|I#V+FP5qG!RgLL9Bl4~WC%Tk#NA{YCeJ`}O*w zw|Ut&uAr_iLByykmY}6Lg#&TrYKi;_Ifw?UstlAJq8TMs&0)8e z;9IN71Pr}xbne;1qe>tcLdhAck~32(cF4$aKM2Q2IH1VATl5|B7Txa`R=LC6W&VND zzCh7U(H8+&Tto&k;gzgLFFeFY)}X6rwTO|fgkY?*imBBIgFs4u4!~=q*w}Jq!=(*{ z#;$y0*HYu=c}KC~(7aF-Jq58XFScDBc-Q}qe_7lCcIl#P!Br4j^J42KV%wvfL-S#B z?%!r8^j$a?=6Zn$mKaFxDU=1X)Z!ZC9ALm!c{fEik<~c3mJ#d*6J(hy)1ZB=xjNNo ztq!ISeNR!bTo>hFH-es+{TCeVPigc~zdgT^Yah*hkn@ZW& ztXm@-g;UuTK5X}pXV%D6C*)Zv`;dLAT*tXM*=z44aZb0DZaz-@-cjZ}uF@1_dvndl zk{TpIGUPiD->Dek^+B3TX`0Tp98?oYEhZajIj$MeX*2wj=iJlAY_cz-Qt)zf{PZqa z4ME5!=#UC$zyne(&YVg%8ijm#PAo-uF?p^-hVW`umm$8BQN2yFmSgOjbB?CXfN!o1 z8aOP+(^)`Gr(|PVlQWQ6_xT&mz>V1z}V=9&Kjti(6vZB zO(VLY8d(?#mgWNIqguvDKt}JvPc=Lz#o)3OfjS@&1YgBOSRsO8YsspO;94SnkVwVT z%%0Rz%#+Q)HZ?0Wg#6QVU8r}s6Frko=^EoSNG{<8fC=X58E;l-c)+V+DS5Vj{Hmkc z2x1IdiA+Hah~5d2CzwD_)5Yn9>5IvQ>-4pwOWr;6e6hiI z#eKbmcP*7sX)Y+c$pwA8Zi&)K{$ zjCts1Ztumxi-Sd}p&)hTrLJXZv$>)MImM{o3e{=@Jl{18K+#dNWm;9^;mCZz z6pW6%)KQRn@>0)Y_}bKU?fUT>hi}Gij^}oa{8c)7&rUXOdDKN$*du}De;f8#2G`6J zF!8G@O2x5aoL`F#6$>gTb+*4~284n9w*-r&9=~0f4cpgxK^T3^36&@M+RG3TQ;eP7BXl=ZYtwt)Q1uzsTy-cM#LF~q z;57o9G?%bqV>1#)?W4O8piA&K;tOO7o|NGQvMv&tO~c>UA>z*^v;65L)Q8c_{!%9D^` z!P_OPFF#{OwPZ39@pBYoD)X>ED0v|iMT!8WT^|DQ+}}{ zy7wBqLhrrqX2ExF2QReWlUjvM_xkO^D0i>U3Gj<1n^3<(0GRh;(u5mu8Z!wDGw2*H zgjUylgIV(~A0B}-dOv}Z>)XrxK1%hHkYJ2 zw(uRI^j0H2U%K`e=ucn_WkBB!V8zaH+->6fI~gmGu{;^OP1fBe-cL!#r)1j$Ugfy@ MH%|SBU~LlnKWaG?C(1Atu}xeRqsjvm iUIgqi4C7nf+bz*{+4s#JRP)!jF zh+qX0ewwUckz3rM#fj;uK8Z=GImJ-viI-xOxPfXIfw))zNPJ*sWMsU{pmd)>`w^Ex KBYP1iP!IsAG#W7g