web_browsers:chrome_screen_scraper:chrome_screen_scraper
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
web_browsers:chrome_screen_scraper:chrome_screen_scraper [2016/07/11 09:56] – peter | web_browsers:chrome_screen_scraper:chrome_screen_scraper [2020/04/15 11:46] (current) – removed peter | ||
---|---|---|---|
Line 1: | Line 1: | ||
- | ====== Web Browsers - Chrome Screen Scraper ====== | ||
- | Composed of two parts. | ||
- | |||
- | * Parser | ||
- | * SaverApp | ||
- | |||
- | |||
- | ===== Load the two extension files into Chrome ===== | ||
- | |||
- | To load these into chrome, you will need to go to the settings menu, choose more tools and then extensions. | ||
- | |||
- | * Tick the “Developer Mode” box that you see, this will allow you to load unpacked extensions. | ||
- | * Click on the button “Load unpacked extension” and navigate to the directory where the Parser files exist. | ||
- | * Click on the button “Load unpacked extension” and navigate to the directory where the SaveApp files exist. | ||
- | |||
- | {{: | ||
- | |||
- | |||
- | Once this has been completed, you will see a new icon appear as shown below | ||
- | |||
- | {{: | ||
- | |||
- | This will be used to start the page parsing. | ||
- | |||
- | Firstly though, you will need to launch the App, this is done by clicking on the “Launch” link as shown below | ||
- | |||
- | {{: | ||
- | |||
- | |||
- | You need to configure the 2 parts so that they can communicate with each other, this is done by the IDs that are given to each of the APP/ | ||
- | |||
- | You should now see a new window appear | ||
- | |||
- | {{: | ||
- | |||
- | You can click in the “my Application ID” box and copy the ID from there (just swoop the mouse over it whilst holding the left button down so that it selects it in blue and then CTRL+C to copy it. | ||
- | |||
- | You can now click the Speech mark button | ||
- | |||
- | {{: | ||
- | |||
- | To launch the Data Parser Extension | ||
- | |||
- | {{: | ||
- | ===== Parser ===== | ||
- | |||
- | ==== index.html ==== | ||
- | |||
- | <file html index.html> | ||
- | < | ||
- | < | ||
- | < | ||
- | <meta charset=" | ||
- | <link href=" | ||
- | </ | ||
- | |||
- | < | ||
- | < | ||
- | < | ||
- | <div> | ||
- | <label for=" | ||
- | <input id=" | ||
- | |||
- | <button id=" | ||
- | <button id=" | ||
- | </ | ||
- | <div id=" | ||
- | |||
- | <script src=" | ||
- | </ | ||
- | |||
- | </ | ||
- | </ | ||
- | |||
- | |||
- | ==== main.css ==== | ||
- | |||
- | <file css main.css> | ||
- | input[type=" | ||
- | width: 240px; | ||
- | } | ||
- | |||
- | #log { | ||
- | background-color: | ||
- | padding: 10px 20px; | ||
- | margin-top: 10px; | ||
- | height: 300px; | ||
- | border: 1px solid black; | ||
- | overflow-y: scroll; | ||
- | overflow-x: hidden; | ||
- | } | ||
- | </ | ||
- | |||
- | |||
- | ==== index.js ==== | ||
- | |||
- | <file javascript index.js> | ||
- | (function(context){ | ||
- | document.getElementById(" | ||
- | var remoteAppID =""; | ||
- | var action =""; | ||
- | var pageIndex=null; | ||
- | console.log(' | ||
- | var sendId; | ||
- | sendId=document.getElementById(" | ||
- | |||
- | chrome.storage.local.get(' | ||
- | console.log(result); | ||
- | remoteAppID=result.remoteappid; | ||
- | console.log(' | ||
- | sendId.value=remoteAppID; | ||
- | }); | ||
- | |||
- | var logField = document.getElementById(" | ||
- | var save=document.getElementById(" | ||
- | var process=document.getElementById(" | ||
- | save.addEventListener(' | ||
- | var remoteID=sendId.value; | ||
- | appendLog(" | ||
- | chrome.storage.local.set({' | ||
- | }); | ||
- | |||
- | process.addEventListener(' | ||
- | pageIndex=1; | ||
- | chrome.tabs.query( | ||
- | { currentWindow: | ||
- | function (tabArray) { | ||
- | var activeTabId= tabArray[0]; | ||
- | chrome.tabs.sendMessage(activeTabId.id, | ||
- | } | ||
- | ); | ||
- | }); | ||
- | |||
- | function doStuffWithDom(res) { | ||
- | var domContent=res.domContent; | ||
- | var pageIndex=res.pageIndex; | ||
- | |||
- | // | ||
- | |||
- | chrome.runtime.sendMessage( | ||
- | sendId.value, | ||
- | {myCustomMessage: | ||
- | function(response) { | ||
- | appendLog(" | ||
- | if (response.Success==" | ||
- | console.log(" | ||
- | appendLog(" | ||
- | }else{ | ||
- | chrome.tabs.query( | ||
- | { currentWindow: | ||
- | function (tabArray) { | ||
- | var activeTabId= tabArray[0]; | ||
- | chrome.tabs.sendMessage(activeTabId.id, | ||
- | } | ||
- | ); | ||
- | console.log(response.result); | ||
- | appendLog(response.result); | ||
- | } | ||
- | }) | ||
- | } | ||
- | |||
- | chrome.runtime.onMessageExternal.addListener( | ||
- | function(request, | ||
- | appendLog(" | ||
- | if (request.result==" | ||
- | console.log(" | ||
- | appendLog(" | ||
- | }else{ | ||
- | chrome.tabs.query( | ||
- | { currentWindow: | ||
- | function (tabArray) { | ||
- | var activeTabId= tabArray[0]; | ||
- | chrome.tabs.sendMessage(activeTabId.id, | ||
- | } | ||
- | ); | ||
- | console.log(request.result); | ||
- | appendLog(request.result); | ||
- | } | ||
- | } | ||
- | ); | ||
- | |||
- | function dealWithNextButton(res){ | ||
- | var result = res.success; | ||
- | if (result==false){ | ||
- | appendLog(" | ||
- | pageIndex=null; | ||
- | }else{ | ||
- | appendLog(" | ||
- | } | ||
- | } | ||
- | |||
- | chrome.webNavigation.onDOMContentLoaded.addListener(function (details) { | ||
- | if (pageIndex !=null){ | ||
- | pageIndex+=1; | ||
- | chrome.tabs.query( | ||
- | { currentWindow: | ||
- | function (tabArray) { | ||
- | var activeTabId= tabArray[0]; | ||
- | chrome.tabs.sendMessage(activeTabId.id, | ||
- | } | ||
- | ); | ||
- | } | ||
- | } | ||
- | ); | ||
- | |||
- | function doInCurrentTab(tabCallback) { | ||
- | } | ||
- | |||
- | |||
- | var appendLog = function(message) { | ||
- | logField.innerText+=" | ||
- | }; | ||
- | |||
- | context.appendLog = appendLog; | ||
- | })(window) | ||
- | </ | ||
- | |||
- | |||
- | ==== content.js ==== | ||
- | |||
- | <file javascript content.js> | ||
- | // Listen for messages | ||
- | chrome.runtime.onMessage.addListener(function (msg, sender, sendResponse) { | ||
- | console.log(' | ||
- | // If the received message has the expected format... | ||
- | if (msg.text === ' | ||
- | // Call the specified callback, passing | ||
- | // the web-page' | ||
- | var tbl = document.getElementsByName(' | ||
- | // | ||
- | |||
- | |||
- | var rows = []; | ||
- | var result = []; | ||
- | $(' | ||
- | var row = {' | ||
- | if ($(this).find(' | ||
- | $(this).find(' | ||
- | row[index] = $(this).text(); | ||
- | }); | ||
- | result.push(row); | ||
- | } | ||
- | }); | ||
- | | ||
- | var retResult = ({' | ||
- | sendResponse(retResult); | ||
- | } | ||
- | if (msg.text === ' | ||
- | var nextButton = $(' | ||
- | if (nextButton.length> | ||
- | nextButton.trigger( " | ||
- | var retResultS = ({' | ||
- | sendResponse(retResultS); | ||
- | }else{ | ||
- | var retResultF = ({' | ||
- | sendResponse(retResultF); | ||
- | } | ||
- | } | ||
- | }); | ||
- | </ | ||
- | |||
- | |||
- | ==== eventPage.js ==== | ||
- | |||
- | <file javascript eventPage.js> | ||
- | var blacklistedIds = [" | ||
- | |||
- | chrome.runtime.onMessageExternal.addListener( | ||
- | function(request, | ||
- | appendLog(" | ||
- | if (request.myResultAction==' | ||
- | appendLog(request.myResultAction+' | ||
- | sendResponse({" | ||
- | } else { | ||
- | sendResponse({" | ||
- | } | ||
- | } | ||
- | ); | ||
- | </ | ||
- | |||
- | |||
- | ==== mainfest.json ==== | ||
- | |||
- | <file json manifest.json> | ||
- | { | ||
- | " | ||
- | " | ||
- | " | ||
- | " | ||
- | " | ||
- | " | ||
- | " | ||
- | }, | ||
- | " | ||
- | " | ||
- | " | ||
- | }, | ||
- | " | ||
- | " | ||
- | " | ||
- | " | ||
- | }], | ||
- | " | ||
- | " | ||
- | } | ||
- | </ | ||
- | |||
- | |||
- | ===== Save Sink Events ===== | ||
- | |||
- | index.html | ||
- | |||
- | <file html index.html> | ||
- | < | ||
- | |||
- | < | ||
- | < | ||
- | <meta charset=" | ||
- | <link href=" | ||
- | </ | ||
- | |||
- | < | ||
- | < | ||
- | < | ||
- | <div> | ||
- | <label for=" | ||
- | <input id=" | ||
- | <button id=" | ||
- | <button id=" | ||
- | </ | ||
- | |||
- | <div id=" | ||
- | |||
- | <script src=" | ||
- | </ | ||
- | </ | ||
- | </ | ||
- | |||
- | |||
- | ===== main.css ===== | ||
- | |||
- | <file css main.css> | ||
- | input[type=" | ||
- | width: 240px; | ||
- | } | ||
- | |||
- | #log { | ||
- | background-color: | ||
- | padding: 10px 20px; | ||
- | margin-top: 10px; | ||
- | height: 300px; | ||
- | border: 1px solid black; | ||
- | overflow-y: scroll; | ||
- | overflow-x: hidden; | ||
- | } | ||
- | </ | ||
- | |||
- | |||
- | ==== index.js ==== | ||
- | |||
- | <file javascript index.js> | ||
- | (function(context){ | ||
- | document.getElementById(" | ||
- | |||
- | var remoteAppID =""; | ||
- | |||
- | chrome.storage.local.get(' | ||
- | remoteAppID=result.remoteappid; | ||
- | sendId.value=remoteAppID; | ||
- | }); | ||
- | |||
- | var logField = document.getElementById(" | ||
- | var selectSave=document.getElementById(" | ||
- | var sendId=document.getElementById(" | ||
- | var clearLog=document.getElementById(" | ||
- | var saveId=document.getElementById(" | ||
- | |||
- | saveId.addEventListener(' | ||
- | var remoteID=sendId.value; | ||
- | appendLog(" | ||
- | chrome.storage.local.set({' | ||
- | //alert (remoteID); | ||
- | }); | ||
- | |||
- | var chosenFileEntry=null; | ||
- | |||
- | clearLog.addEventListener(' | ||
- | logField.innerText=''; | ||
- | }); | ||
- | |||
- | errorHandler = function (obj) { | ||
- | sendResponse({" | ||
- | console.log(obj); | ||
- | }; | ||
- | |||
- | selectSave.addEventListener(' | ||
- | chrome.fileSystem.chooseEntry({type: | ||
- | suggestedName: | ||
- | |||
- | function(writableFileEntry) { | ||
- | chosenFileEntry=writableFileEntry; | ||
- | |||
- | writableFileEntry.createWriter(function(writer) { | ||
- | writer.seek(0); | ||
- | writer.truncate(0); | ||
- | writer.onwriteend = function(e) { | ||
- | }; | ||
- | |||
- | writer.write(new Blob([' | ||
- | {type: ' | ||
- | )); | ||
- | }, errorHandler); | ||
- | }); | ||
- | }); | ||
- | |||
- | |||
- | chrome.runtime.onMessageExternal.addListener( | ||
- | function(request, | ||
- | if (request.myAction) { | ||
- | appendLog(" | ||
- | // | ||
- | |||
- | if (request.myAction==' | ||
- | sendResponse({" | ||
- | } | ||
- | |||
- | if (request.myAction==' | ||
- | var arrData = typeof JSONData != ' | ||
- | var CSV = ''; | ||
- | |||
- | for (var i = 0; i < arrData.length; | ||
- | var row = ""; | ||
- | |||
- | for (var index in arrData[i]) { | ||
- | //row += '"' | ||
- | row += arrData[i][index] + ','; | ||
- | } | ||
- | |||
- | row.slice(0, | ||
- | CSV += row + ' | ||
- | } | ||
- | |||
- | appendLog(" | ||
- | |||
- | chosenFileEntry.file(function(file) { | ||
- | var reader = new FileReader(); | ||
- | |||
- | reader.onload = function(e) { | ||
- | var contents =e.target.result; | ||
- | |||
- | if (request.myIndex==1){ | ||
- | contents=''; | ||
- | appendLog(" | ||
- | } | ||
- | |||
- | appendLog(" | ||
- | CSV=contents+CSV; | ||
- | |||
- | chrome.fileSystem.getWritableEntry(chosenFileEntry, | ||
- | function(writableFileEntry) { | ||
- | writableFileEntry.createWriter(function(writer) { | ||
- | writer.onwriteend = function(e) { | ||
- | appendLog(" | ||
- | sendReply(" | ||
- | }; | ||
- | |||
- | chosenFileEntry.file(function(file) { | ||
- | writer.write(new Blob([CSV], | ||
- | {type: ' | ||
- | }); | ||
- | }, errorHandler); | ||
- | }); | ||
- | }; | ||
- | |||
- | reader.readAsText(file); | ||
- | }); | ||
- | } | ||
- | } else { | ||
- | sendResponse({" | ||
- | } | ||
- | }); | ||
- | |||
- | function sendReply(myResult, | ||
- | chrome.runtime.sendMessage( | ||
- | sendId.value, | ||
- | {result: myResult, myResultAction : myResultAction, | ||
- | |||
- | function(response) { | ||
- | console.log(" | ||
- | appendLog(" | ||
- | } | ||
- | ) | ||
- | } | ||
- | |||
- | var appendLog = function(message) { | ||
- | logField.innerText+=" | ||
- | }; | ||
- | |||
- | |||
- | context.appendLog = appendLog; | ||
- | })(window) | ||
- | </ | ||
- | |||
- | |||
- | ==== manifest.json ==== | ||
- | |||
- | <file json manifest.json> | ||
- | { | ||
- | " | ||
- | " | ||
- | " | ||
- | " | ||
- | " | ||
- | " | ||
- | " | ||
- | }, | ||
- | " | ||
- | " | ||
- | " | ||
- | } | ||
- | }, | ||
- | " | ||
- | } | ||
- | </ |
web_browsers/chrome_screen_scraper/chrome_screen_scraper.1468231012.txt.gz · Last modified: 2020/07/15 09:30 (external edit)