Compare commits

..

No commits in common. "647cef869a31128c04914f7007460878aeea5eea" and "5c052c8d920116188b137073734ccf776924ced5" have entirely different histories.

7 changed files with 66 additions and 70 deletions

View File

@ -8,6 +8,4 @@ def main():
print(f"Error al iniciar la aplicación: {e}") print(f"Error al iniciar la aplicación: {e}")
if __name__ == "__main__": if __name__ == "__main__":
main() main()
#self.tasks["scrapper"].start(self.scrapper.start_scraping)

View File

@ -33,10 +33,6 @@ class Scrapper:
def start_scraping(self): def start_scraping(self):
"""Inicia el proceso de scraping""" """Inicia el proceso de scraping"""
if self.running:
print("El scrapping ya está en ejecución.")
return
self.running = True self.running = True
url = self.get_url_from_ui() url = self.get_url_from_ui()
if url: if url:
@ -46,15 +42,14 @@ class Scrapper:
else: else:
print("No se proporcionó una URL válida.") print("No se proporcionó una URL válida.")
def stop_scraping(self): def stop_scraping(self):
"""Detiene el proceso de scraping""" """Detiene el proceso de scraping"""
print("Deteniendo el proceso de scraping...") self.running = False
# Detener las tareas print("Scrapping detenido. Proceso finalizado.")
self.scraping_task.stop_thread()
self.db_task.stop()
# Inserta un sentinel (None) en la cola para detener el hilo de inserción #Vaciar la cola para detener el hilo de inserción
self.link_queue.put(None) while not self.link_queue.empty():
self.link_queue.get()
# Actualiza la pestaña "Scrapping" con un mensaje # Actualiza la pestaña "Scrapping" con un mensaje
tab = self.ui_instance.tabs["Scrapping"] tab = self.ui_instance.tabs["Scrapping"]
@ -63,39 +58,36 @@ class Scrapper:
text_widget.configure(state="normal") text_widget.configure(state="normal")
text_widget.insert("end", "Scrapping finalizado.\n") text_widget.insert("end", "Scrapping finalizado.\n")
text_widget.see("end") text_widget.see("end")
text_widget.configure(state="disabled") text_widget.configure(state="disabled")
print("Scrapping detenido. Proceso finalizado.")
def scrape_page(self, url): def scrape_page(self, url):
"""Scrapea una web y busca los enlaces""" """Scrapea una web y busca los enlaces"""
if not self.running or url in self.visited_links: if not self.running or url in self.visited_links:
return return
with self.lock:
self.visited_links.add(url)
with self.lock: try:
self.visited_links.add(url) response = requests.get(url, timeout=10)
if response.status_code == 200:
soup = BeautifulSoup(response.text, "html.parser")
links = [urljoin(url, a.get("href")) for a in soup.find_all("a", href=True)]
self.update_ui(url, links)
try: for link in links:
response = requests.get(url, timeout=10) if not self.running:
if response.status_code == 200: break
soup = BeautifulSoup(response.text, "html.parser") self.link_queue.put((url, link))
links = [urljoin(url, a.get("href")) for a in soup.find_all("a", href=True)]
self.update_ui(url, links)
for link in links:
if not self.running:
break
self.link_queue.put((url, link))
# Procesar los enlaces de forma secuencial en lugar de crear nuevos hilos
for link in links:
if not self.running:
break
self.scrape_page(link)
else:
print(f"Error al acceder a {url}: {response.status_code}")
except Exception as e:
print(f"Error al scrapear {url}: {e}")
for link in links:
if not self.running:
break
threading.Thread(target=self.scrape_page, args=(link,), daemon=True).start()
else:
print(f"Error al acceder a {url}: {response.status_code}")
except Exception as e:
print(f"Error al scrapear {url}: {e}")
def update_ui(self, url, links): def update_ui(self, url, links):
"""Actualiza la pestaña 'Scrapping' con los enlaces encontrados""" """Actualiza la pestaña 'Scrapping' con los enlaces encontrados"""
@ -110,16 +102,15 @@ class Scrapper:
text_widget.configure(state="disabled") text_widget.configure(state="disabled")
def insert_links_to_db(self): def insert_links_to_db(self):
"""Inserta los enlaces en la base de datos desde la cola""" """Inserta los enlaces en la base de datos desde la cola"""
while True: while self.running or not self.link_queue.empty():
try: try:
# Obtener un enlace de la cola # Obtener un enlace de la cola
item = self.link_queue.get(timeout=1) if not self.running and self.link_queue.empty():
if item is None: # Si encuentra el sentinel, detiene el hilo break
break parent_url, link = self.link_queue.get(timeout=1) # Espera 1 segundo si la cola está vacía
parent_url, link = item
connection = mysql.connector.connect(**self.db_config) connection = mysql.connector.connect(**self.db_config)
cursor = connection.cursor() cursor = connection.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS links (id INT AUTO_INCREMENT PRIMARY KEY, url TEXT, parent_url TEXT)") cursor.execute("CREATE TABLE IF NOT EXISTS links (id INT AUTO_INCREMENT PRIMARY KEY, url TEXT, parent_url TEXT)")
@ -129,7 +120,7 @@ class Scrapper:
connection.close() connection.close()
print(f"Enlace guardado: {link} (parent: {parent_url})") print(f"Enlace guardado: {link} (parent: {parent_url})")
except Exception as e: except Exception as e:
print(f"Error al guardar en la base de datos: {e}") print(f"Error al guardar en la base de datos: {e}")
def get_url_from_ui(self): def get_url_from_ui(self):
@ -140,6 +131,23 @@ class Scrapper:
except AttributeError: except AttributeError:
print("No se pudo obtener la URL desde la interfaz") print("No se pudo obtener la URL desde la interfaz")
return None return None
"""
def save_links_to_db(self, url, links):
Guarda los enlaces en la base de datos
try:
connection = mysql.connector.connect(**self.db_config)
cursor = connection.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS links (id INT AUTO_INCREMENT PRIMARY KEY, url TEXT, parent_url TEXT)")
for link in links:
print(f"Guardando enlace: {link} (parent: {url})") # Verifica los datos
cursor.execute("INSERT INTO links (url, parent_url) VALUES (%s, %s)", (link, url))
connection.commit()
cursor.close()
connection.close()
except Exception as e:
print(f"Error al gaurdar en la base de datos: {e}")
"""

View File

@ -8,8 +8,7 @@ class SystemMonitor:
def __init__(self, parent_frame): def __init__(self, parent_frame):
self.parent_frame = parent_frame self.parent_frame = parent_frame
self.max_data_points = 60 self.max_data_points = 60
self.running = False self.running = False
self.previous_net_io = psutil.net_io_counters()
self.metrics = { self.metrics = {
"CPU Usage": { "CPU Usage": {
"data": [], "data": [],
@ -21,9 +20,9 @@ class SystemMonitor:
"fetch_func": lambda: psutil.virtual_memory().percent, "fetch_func": lambda: psutil.virtual_memory().percent,
"interval": 1 "interval": 1
}, },
"Network Usage (KB/s)": { "Processes": {
"data": [], "data": [],
"fetch_func": self.get_network_usage, "fetch_func": self.get_process_count,
"interval": 1 "interval": 1
} }
} }
@ -77,20 +76,11 @@ class SystemMonitor:
def redraw(): def redraw():
graph["line"].set_data(x, data) graph["line"].set_data(x, data)
graph["axis"].set_xlim(0, len(data)) graph["axis"].set_xlim(0, len(data))
graph["axis"].set_ylim(0, max(data) * 1.2 if data else 100)
graph["figure"].canvas.draw() graph["figure"].canvas.draw()
self.parent_frame.after(0, redraw) self.parent_frame.after(0, redraw)
def get_network_usage(self): def get_process_count(self):
"""Calcula la velocidad de transferencia de red en KB/s.""" """Obtiene el número de procesos actuales."""
current_net_io = psutil.net_io_counters() return len(psutil.pids())
sent_bytes = current_net_io.bytes_sent - self.previous_net_io.bytes_sent
recv_bytes = current_net_io.bytes_recv - self.previous_net_io.bytes_recv
self.previous_net_io = current_net_io # Actualiza los datos previos
# Convierte a KB/s
total_kb = (sent_bytes + recv_bytes) / 1024
#print(f"Network Usage: {total_kb} KB/s")
return total_kb

View File

@ -38,7 +38,7 @@ class ThreadsManager:
self.tasks["time"].start(self.update_time) self.tasks["time"].start(self.update_time)
self.tasks["temperature"].start(self.update_temperature) self.tasks["temperature"].start(self.update_temperature)
self.tasks["emails"].start(self.update_emails) self.tasks["emails"].start(self.update_emails)
self.tasks["scrapper"].start(self.scrapper.start_scraping)
if self.system_monitor: if self.system_monitor:
for metric in self.system_monitor.metrics.keys(): for metric in self.system_monitor.metrics.keys():