[PyQt] PyQt5.6: issues with QBAsicTimer when running a script using PyQtWebKitWidgets
Dave Sampson
samper.d at gmail.com
Mon Jan 8 12:49:51 GMT 2018
Barry,
Thanks for the reply and assistance in solving this issue.
As you suggested, I tried to remove all dependencies for Anaconda and
Spyder, and started to hit some more brick walls with versionitis of PyQt5.
So here we go.
* I created a virtualenv right on my Ubuntu system
* When I installed various packages using PIP I could not install
PyQt5.6 like what I had in anaconda, I had to choose at minimum 5.7
so dove all in with 5.9. And then the fun really started.
* PyQt 5.6 to 5.9 conversion lead to broken imports. I managed to fix
those and handle cases for differences between 5.6 and 5.9 using
try/exept pairs (see new BROKEN script below)
* QWebPage changed locations and namespace (some docs here:
http://doc.qt.io/qt-5/qtwebenginewidgets-qtwebkitportingguide.html)
* Then of course the PyQT C library changed so I had to find new
import modules and new sub-modules.
* Then PyQt 5.9 introduces some asynchronous functions that expect
call back functions. I really don't understand call back functions,
but I tried creating one anyhow.
* The new "pip list" for the script below includes the following
o """beautifulsoup4==4.6.0
bs4==0.0.1)
certifi==2017.11.5
chardet==3.0.4
idna==2.6
pip==9.0.1
pkg-resources==0.0.0
PyQt5==5.9.2
QtPy==1.3.1
requests==2.18.4
setuptools==38.2.5
sip==4.19.6
urllib3==1.22
wheel==0.30.0"""
revised script with new issues:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright (c) Tue Dec 19 21:00:56 2017, David Sampson
(samper.d at gmail.com)
#
# license: GNU LGPL
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
"""
Created on Tue Dec 19 21:00:56 2017
@author: sampson
"""
# Imports
import sys
import requests
#from config import *
from bs4 import BeautifulSoup
from PyQt5.QtCore import QThread, QBasicTimer, QCoreApplication
try:
from PyQt5.QtWebKitWidgets import QWebPage
pyqt="5.6"
except:
from PyQt5.QtWebEngineWidgets import QWebEnginePage as QWebPage
pyqt="5.9"
from PyQt5.QtWidgets import QApplication
# Variables
#url = 'http://webscraping.com'
#url='http://www.amazon.com'
#TickerList = "AEU.UN, BMO, BNS"
Ticker = "BNS"
TickerList = "AEU.UN, BMO"
url="http://www.JsEnabledSite.com"
# Constants
# Main function
class Render(QWebPage):
"""Render HTML with PyQt5 WebKit."""
def __init__(self, html):
self.html = None
self.app = QCoreApplication.instance()
if self.app is None:
self.app = QApplication(sys.argv)
print("Creating new QApplication instance")
else:
print("using Existing instance of QApplication: %s" %
str(self.app))
QWebPage.__init__(self)
self.timer = QBasicTimer()
self.timer.start(5000, self)
if self.timer.isActive() == True:
print("timer is active")
print("timer ID: %s" % str(self.timer.timerId()))
else:
print("timer is inactive")
self.loadStarted.connect(self._loadStarted)
self.loadProgress.connect(self._loadProgress)
self.loadFinished.connect(self._loadFinished)
try:
self.mainFrame().setHtml(html)
except:
self.setHtml(html)
self.app.exec()
def _loadFinished(self, result):
if pyqt == "5.9":
#self.html = self.toHtml(get_html())
print("fetching HTML")
self.toHtml(self.setHtml())
print("HTML Fetched")
if pyqt == "5.6":
self.html = self.mainFrame().toHtml()
print("Finihsed function passed")
else:
print("you are running pyqt version: unknown")
print("Load Finished")
self.timer.stop()
if self.timer.isActive() == True:
print("timer is active")
print("timer ID: %s" % str(self.timer.timerId()))
else:
print("timer is inactive")
QApplication.instance().quit()
#self.app.quit()
def _loadStarted(self):
print("Page is loading content")
def _loadProgress(self, progress):
print("Page is loading: %s" % progress)
def get_html(self, result):
print(self.html)
print(result)
return result
def main():
print("url: %s" % url)
get_page(url)
def get_page(Url):
"""
This module accepts a URL and returns a page with rendered JavaScript
using PyQt5 Webkit.Stock
"""
#global app
# get the raw HTML
SourceHtml = requests.get(Url).text
#app = QApplication(sys.argv)
#app = QApplication.instance()
#if app is None:
# app = QApplication(sys.argv)
#else:
# print("using Existing instance of QApplication: %s" % str(app))
RenderedHtml = Render(SourceHtml).html
#app.exec_()
#sys.exit(app.exec())
#print(RenderedHtml)
print("Finished")
print(RenderedHtml)
return RenderedHtml
if __name__ == '__main__':
main()
#get_page(url)
On 01/01/2018 03:32 AM, Barry wrote:
> What happens if you run your script outside of spyder from the bash
> prompt?
> Maybe its sypder that is breaking things becuase its in control not
> your code.
>
> Barry
>
> On 1 Jan 2018, at 04:02, Dave Sampson <samper.d at gmail.com
> <mailto:samper.d at gmail.com>> wrote:
>
>> Hey Folks,
>>
>> A special thanks to anyone who takes time to read about my current
>> situation and provide part of or all of the solution.
>>
>> The Issue:
>>
>> ==========
>>
>> As part of a larger project, I am wanting to develop a function,
>> class or module using python that will accept a URL of a javascript
>> enabled website and return the page's HTML contents generated by
>> those JS scripts. Please see the end of this post for a partially
>> functioning script.
>>
>> I used this reference as my initial source that got me using the PyQt
>> bindings.
>> https://impythonist.wordpress.com/2015/01/06/ultimate-guide-for-scraping-javascript-rendered-web-pages/
>>
>> Much of the documentation refers to PyQt4, I am using PyQt5.6, which
>> is sometimes not in line with more recent PyQt5 documentation. So
>> there has been lots of trial and error along the way.
>>
>> I am able to run the below script once, however upon a second run of
>> the script I keep running up against this error: "QBasicTimer::start:
>> Stopping previous timer failed. Possibly trying to stop from a
>> different thread".
>>
>> I will need to scrape at minimum 3 separate URL's each time this
>> module/class is called. I assume that once I get the solution to make
>> two successful runs then running 10's or 100's should also work.
>>
>> This module will be called from another control module. I have little
>> interest in persistent objects like QApplication, between the
>> different calls.
>>
>> At this point I am getting a bit frustrated that I can't just send a
>> function a URL and receive back HTML in rapid succession. If JS was
>> not involved I would not have this issue. Using requests, urllib and
>> BeautifulSoup for walking through the HTMLDOC are all working fine.
>>
>> Please note I am not a C programmer so being able to abstract
>> concepts from C documentation to python is sometimes a challenge. I
>> would consider myself an intermediate python scripter who learned on
>> 2.7 and now consistently use 3.6 for my projects. Pardon if some
>> syntax smells of 2.x. I try mostly to follow PEP8 once I have solid
>> code in place.
>>
>> Also note that I am not a classically trained developer, I am
>> geographer who designs Geographic Information Systems (GIS), so the
>> concept of classes are still a bit abstract. I understand analogies
>> of classes for describing cars, robots and a pizza making process,
>> but leveraging PyQT for web scraping and creating GUI objects that
>> will never render anything visually is a bit confusing. Maybe someday
>> urllib will process JS. Generally I use python for automation of data
>> management tasks and processes in the geomatics domain. So please
>> assume I need some explanation if your response is "remember when
>> using classes do this and that", I likely never knew what you will
>> refer to. Assume I know nothing about classes and the lineage of
>> inheritance of PyQt objects. (smile)
>>
>> The environment:
>>
>> ============
>>
>> * Ubuntu 16.04 LTS
>>
>> * Anaconda navigator 1.6.11
>>
>> * Python 3 dedicated environment
>>
>> * Python 3.6.3 (64 bit)
>>
>> * Spyder 3.2.5
>>
>> * PyQt 5.6
>>
>> * Qt 5.6.2
>>
>>
>> Sources:
>>
>> =======
>>
>> These are some of the sources I have used to try and solve this
>> problem. I did not capture all of the sources used as many other
>> resources point back to these sources:
>>
>> *
>> https://stackoverflow.com/questions/6180293/pyqt-timers-cannot-be-started-from-another-thread
>>
>> *
>> https://forum.qt.io/topic/13459/timers-cannot-be-stopped-from-another-thread-but-how-do-i-stop-start-timer-in-thread
>>
>> * https://github.com/spyder-ide/spyder/issues/974
>>
>> 6. [Web Scraping Primer using Webkit
>> (https://impythonist.wordpress.com/2015/01/06/ultimate-guide-for-scraping-javascript-rendered-web-pages/)]
>> 7. [How to run PyQt applications within Spyder
>> (https://github.com/spyder-ide/spyder/wiki/How-to-run-PyQt-applications-within-Spyder)]
>> 8. [PyQT code can not run twice when using Spyder IDE
>> (https://github.com/spyder-ide/spyder/issues/2970)]
>>
>>
>> Problem solving approaches:
>>
>> ==================
>>
>> 1. The first issue I came across are related to running PyQT object
>> like QApplication within the Spyder IDE since there is already a
>> QApplication object created by Spyder. So this problem lead to a
>> feew different approaches.
>> 1. Moved app = QApplication() outside of the __init__() function
>> of the class
>> 2. Added a conditional check for QApplication.instance() to
>> either reuse the existing object or create one if missing.
>> This depends on if you run the code inside or outside of a QT
>> environment. For example running at the command line should
>> create a new instance, where running in Spyder reuses the
>> existing instance
>> 3. Created the App object in another module that imports this
>> one. The same result is achieved since the namespace of the
>> imported module perpetuates the same namespace.
>> 4. I have even tried overriding various names in the namespace
>> without success. I understand this is dangerous, but wanted
>> to try and isolate what object is creating the timer.
>> 5. Resetting the whole namespace losses some valuable settings
>> found in "from config.py import *". for the purpose of the
>> script below that line can remain commented
>> 6. Moving the creation of the App object outside the class
>> caused some other issues when the page was loaded and the
>> app.quit() function is called. So I tried grabbing the app
>> object created outside the class, but I could never grab onto
>> it to close it, so it would hang. I wondered if I could then
>> call a function external to the class
>> 2. So after trying to attack the persistent QApplication Object I
>> went after trying to solve the QBasicTimer issue.
>> 1. I tried killing the timer with app.killTimer() before the
>> app.quit() is called once the page is loaded. However, no
>> timer was found. this suggests that some other object was
>> creating a timer. I could not track that down.
>> 2. Although the self.app object created by QApplication had a
>> app.killTimer() function there were no start() or stop()
>> timer methods, and I could not find out what value was left
>> on the timer. So I tried creating a timer object within the
>> class for the app object. This allowed me to start and stop
>> timers (set to 5 seconds), however even after verifying that
>> the timer was created, started, killed and then no longer
>> existing I still get the QBasicTimmer error.
>> 3. In some cases, if I killed the time too early the program
>> would hang. Killing the timer after the app.quit() function
>> rendered no timer. So did I create a timer that still ran
>> after the app died? Or is the timer somewhere else?
>> 3. The next area I started to explore was the concept of creating
>> threads with QThreads. The sources I read were giving this
>> concept high hopes. I soon was reminded I am horrible at reading
>> C documentation and quickly gave up. Besides, even creating
>> threads may bring those persistent objects into my executed
>> namespace.
>> 4. External processes was my next approach.
>> 1. Could I use something like "from subprocess import call" and
>> then I could just call([python this-script.py",
>> "http:\\this.url.com"]) as a subprocess. Thinking that once
>> the subprocess would run then the process would die along
>> with any timers. I could also create the QApplication object
>> in a jail and not polute my namespace. I could not even get
>> simple linux commands like "ls" to return anything
>> meaningful, let alone a "return" value from the script.
>> result = fail.
>> 5. Why use Anaconda/spyder at all? I came across these types of
>> responses in various posts to similar probelms. It is a fair
>> enough question, so let me try to address some points to consider.
>> 1. I work in government and think it is high time that any
>> public servant who has an idea to automate something in their
>> workflow should have easy tools to help. Anaconda could be
>> this tool.
>> 2. If the final solution is to strip out everything except for a
>> text editor (vim) and command line (BASH) then I will take
>> it. At the end of the day I need something that works. I tend
>> to work in these types of stripped down environments through
>> SSH on servers anyways. But perhaps there is an alternative
>> solution.
>> 3. I have been using Python and BASH for years and like the
>> lightweight approach, however it is not for the faint of
>> heart. I just recently started using Anaconda as a general
>> data wrangling platform and I like its elegance. I have been
>> promoting it with other non developers and govies (aka public
>> servants) playing in the data space. I feel the platform is
>> and could be a great equalizer. I would feel pretty silly
>> recomending this platform if it could not support simple
>> web-scrapping. result = fail!
>> 4. I also just learned about Spyder, through using Anaconda.
>> Spyder provides me what I need and emulates my lightweight
>> setup of a text editor, Command Line and File browser setup.
>> Also I like the debug tools, being able to see all the
>> variables in the namespace, create stop points and control
>> step throughs. All of these things I never had at the command
>> line. and Ipython is quite intriguing. I think I am just at
>> the start of another journey towards efficiency.
>> 5. I am a big believer that tech tools should not influence or
>> impede workflows. In theory this should all work in anaconda
>> 6. I find setting up individual virtual environments tedious for
>> the various python projects I have on the go.
>>
>> That is a general overview of my battles with PyQt for web scrapping
>> over the past couple of weeks. So now I turn to the mailing list of
>> creative, professional and motivated PyQt users to let me know what
>> obvious solution I am missing.
>>
>> So now I present to you fine and dedicated list viewers my script for
>> consideration. I look forward to learning what solution(s) come forward.
>>
>> I am prepared to be humbled.
>>
>>
>> The Script:
>>
>> ========
>>
>> This module should be able to run once in either the Spyder IPython
>> Console or an anacoda terminal window. When you run it a second time
>> you should get the error. I have left many other code snipets intact
>> using comments to let you know some of the other approaches I have taken.
>>
>> #!/usr/bin/env python3
>> # -*- coding: utf-8 -*-
>> #
>> # Copyright (c) Tue Dec 19 21:00:56 2017, David Sampson
>> (samper.d at gmail.com)
>> #
>> # license: GNU LGPL
>> #
>> # This library is free software; you can redistribute it and/or
>> # modify it under the terms of the GNU Lesser General Public
>> # License as published by the Free Software Foundation; either
>> # version 2.1 of the License, or (at your option) any later version.
>>
>>
>> """
>> Created on Tue Dec 19 21:00:56 2017
>>
>> @author: sampson
>> """
>>
>> # Imports
>> import sys
>> import requests
>> #from config import *
>> from PyQt5.QtCore import QThread, QBasicTimer, QCoreApplication
>> from PyQt5.QtWebKitWidgets import QWebPage
>> from PyQt5.QtWidgets import QApplication
>>
>> # Variables
>> #url = 'http://webscraping.com'
>> #url='http://www.amazon.com'
>>
>> ### EDIT THIS###
>> url="http://www.JsEnabledSite.com"
>>
>> # Constants
>>
>>
>> # Main function
>> def main():
>> get_page(url)
>>
>>
>>
>> class Render(QWebPage):
>> """Render HTML with PyQt5 WebKit."""
>> def __init__(self, html):
>> self.html = None
>> self.app = QCoreApplication.instance()
>> if self.app is None:
>> self.app = QApplication(sys.argv)
>> print("Creating new QApplication instance")
>> else:
>> print("using Existing instance of QApplication: %s" %
>> str(self.app))
>> QWebPage.__init__(self)
>> self.timer = QBasicTimer()
>> self.timer.start(5000, self)
>> if self.timer.isActive() == True:
>> print("timer is active")
>> print("timer ID: %s" % str(self.timer.timerId()))
>> else:
>> print("timer is inactive")
>> self.loadFinished.connect(self._loadFinished)
>> self.mainFrame().setHtml(html)
>> self.app.exec()
>>
>>
>> def _loadFinished(self, result):
>> self.html = self.mainFrame().toHtml()
>> print("Load Finished")
>> self.timer.stop()
>> if self.timer.isActive() == True:
>> print("timer is active")
>> print("timer ID: %s" % str(self.timer.timerId()))
>> else:
>> print("timer is inactive")
>> QApplication.instance().quit()
>> #self.app.quit()
>>
>>
>>
>>
>> def get_page(Url):
>> """
>> This module accepts a URL and returns a page with rendered
>> JavaScript
>> using PyQt5 Webkit.Stock
>> """
>> #global app
>> # get the raw HTML
>> SourceHtml = requests.get(Url).text
>>
>> #app = QApplication(sys.argv)
>> #app = QApplication.instance()
>> #if app is None:
>> # app = QApplication(sys.argv)
>> #else:
>> # print("using Existing instance of QApplication: %s" % str(app))
>> RenderedHtml = Render(SourceHtml).html
>> #app.exec_()
>>
>> #sys.exit(app.exec())
>>
>> #print(RenderedHtml)
>> print("Finished")
>> return RenderedHtml
>>
>>
>> if __name__ == '__main__':
>> main()
>> #get_page(url)
>>
>>
>> _______________________________________________
>> PyQt mailing list PyQt at riverbankcomputing.com
>> <mailto:PyQt at riverbankcomputing.com>
>> https://www.riverbankcomputing.com/mailman/listinfo/pyqt
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://www.riverbankcomputing.com/pipermail/pyqt/attachments/20180108/df881d21/attachment-0001.html>
More information about the PyQt
mailing list