web_browsers:chrome_screen_scraper:chrome_screen_scraper
This is an old revision of the document!
Table of Contents
Web Browsers - Chrome Screen Scraper
Composed of two parts.
- Parser
- SaverApp
Load the two extension files into Chrome
To load these into chrome, you will need to go to the settings menu, choose more tools and then extensions.
- Tick the “Developer Mode” box that you see, this will allow you to load unpacked extensions.
- Click on the button “Load unpacked extension” and navigate to the directory where the Parser files exist.
- Click on the button “Load unpacked extension” and navigate to the directory where the SaveApp files exist.
Parser
index.html
- index.html
<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <link href="main.css" rel="stylesheet"> </head> <body> <h3>Data Parser</h3> <label>Parser ID is <input id="appid" type="text" readonly></input></label> <div> <label for="sendId">Destination ID</label> <input id="sendId" type="text"></input> <button id="saveID">Save ID</button> <button id="process">Start Process</button> </div> <div id="log"></div> <script src="index.js"></script> </body> </html>
main.css
- main.css
input[type="text"] { width: 240px; } #log { background-color: rgb(226, 226, 250); padding: 10px 20px; margin-top: 10px; height: 300px; border: 1px solid black; overflow-y: scroll; overflow-x: hidden; }
index.js
- index.js
(function(context){ document.getElementById("appid").value=chrome.runtime.id; var remoteAppID =""; var action =""; var pageIndex=null; console.log('Starting'); var sendId; sendId=document.getElementById("sendId"); chrome.storage.local.get('remoteappid',function (result) { console.log(result); remoteAppID=result.remoteappid; console.log('In Loop' +remoteAppID); sendId.value=remoteAppID; }); var logField = document.getElementById("log"); var save=document.getElementById("saveID"); var process=document.getElementById("process"); save.addEventListener('click', function() { var remoteID=sendId.value; appendLog("Saved"); chrome.storage.local.set({'remoteappid': remoteID}); }); process.addEventListener('click', function() { pageIndex=1; chrome.tabs.query( { currentWindow: true, active: true }, function (tabArray) { var activeTabId= tabArray[0]; chrome.tabs.sendMessage(activeTabId.id, {text: 'report_back',pageIndex: pageIndex}, doStuffWithDom); } ); }); function doStuffWithDom(res) { var domContent=res.domContent; var pageIndex=res.pageIndex; //console.log('I received the following DOM content:\n' + domContent); chrome.runtime.sendMessage( sendId.value, {myCustomMessage: JSON.stringify(domContent),myAction : "save",myIndex : pageIndex}, function(response) { appendLog("Remote Message : " + response.result); if (response.Success=="false"){ console.log("Remote Error : " + response.result); appendLog("Remote Error : " + response.result); }else{ chrome.tabs.query( { currentWindow: true, active: true }, function (tabArray) { var activeTabId= tabArray[0]; chrome.tabs.sendMessage(activeTabId.id, {text: 'process_next'},dealWithNextButton); } ); console.log(response.result); appendLog(response.result); } }) } chrome.runtime.onMessageExternal.addListener( function(request, sender, sendResponse) { appendLog("Remote Message : " + request.myResultAction); if (request.result=="false"){ console.log("Remote Error : " + request.myResultAction); appendLog("Remote Error : " + request.myResultAction); }else{ chrome.tabs.query( { currentWindow: true, active: true }, function (tabArray) { var activeTabId= tabArray[0]; chrome.tabs.sendMessage(activeTabId.id, {text: 'process_next'},dealWithNextButton); } ); console.log(request.result); appendLog(request.result); } } ); function dealWithNextButton(res){ var result = res.success; if (result==false){ appendLog("Finished."); pageIndex=null; }else{ appendLog("Still Processing."); } } chrome.webNavigation.onDOMContentLoaded.addListener(function (details) { if (pageIndex !=null){ pageIndex+=1; chrome.tabs.query( { currentWindow: true, active: true }, function (tabArray) { var activeTabId= tabArray[0]; chrome.tabs.sendMessage(activeTabId.id, {text: 'report_back',pageIndex: pageIndex}, doStuffWithDom); } ); } } ); function doInCurrentTab(tabCallback) { } var appendLog = function(message) { logField.innerText+="\n"+message; }; context.appendLog = appendLog; })(window)
content.js
- content.js
// Listen for messages chrome.runtime.onMessage.addListener(function (msg, sender, sendResponse) { console.log('got here'); // If the received message has the expected format... if (msg.text === 'report_back') { // Call the specified callback, passing // the web-page's DOM content as argument var tbl = document.getElementsByName('table1')[0].innerHTML; //sendResponse(document.all[0].outerHTML); var rows = []; var result = []; $('table[name=table1]>tbody>tr').each(function(id){ var row = {'id': id+1}; if ($(this).find('td').length!=0){ $(this).find('td').each(function(index){ row[index] = $(this).text(); }); result.push(row); } }); var retResult = ({'domContent': result,'tableContent': result,'pageIndex':msg.pageIndex}); sendResponse(retResult); } if (msg.text === 'process_next') { var nextButton = $('[name="nextButton"]'); if (nextButton.length>0){ nextButton.trigger( "click" ); var retResultS = ({'success':true}); sendResponse(retResultS); }else{ var retResultF = ({'success':false}); sendResponse(retResultF); } } });
eventPage.js
- eventPage.js
var blacklistedIds = ["none"]; chrome.runtime.onMessageExternal.addListener( function(request, sender, sendResponse) { appendLog("MSG RCV : " + request.myResultAction+' ' +request.myResultIndex); if (request.myResultAction=='Ok Saved :') { appendLog(request.myResultAction+' ' +request.myResultIndex); sendResponse({"result":"Ok, got your message"}); } else { sendResponse({"result":"Ops, I don't understand this message :" + request.myResultAction}); } } );
mainfest.json
- manifest.json
{ "name": "AG Data Parser Extension", "version": "1.1", "description": "Extension to parse pages and send them to the save sink.", "browser_action": { "default_title": "Send message to other apps", "default_icon": "icon_16.png", "default_popup": "index.html" }, "background": { "scripts": ["eventPage.js"], "persistent": false }, "content_scripts": [{ "matches": ["<all_urls>"], "js": ["content.js","jquery.js"], "run_at": "document_end" }], "permissions": [ "activeTab", "notifications","storage","webNavigation"], "manifest_version": 2 }
Save Sink Events
index.html
- index.html
<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <link href="main.css" rel="stylesheet"> </head> <body> <h3>Data Parser</h3> <label>Parser ID is <input id="appid" type="text" readonly></input></label> <div> <label for="sendId">Destination ID</label> <input id="sendId" type="text"></input> <button id="saveID">Save ID</button> <button id="process">Start Process</button> </div> <div id="log"></div> <script src="index.js"></script> </body> </html>
main.css
- main.css
input[type="text"] { width: 240px; } #log { background-color: rgb(226, 226, 250); padding: 10px 20px; margin-top: 10px; height: 300px; border: 1px solid black; overflow-y: scroll; overflow-x: hidden; }
index.js
- index.js
(function(context){ document.getElementById("appid").value=chrome.runtime.id; var remoteAppID =""; chrome.storage.local.get('remoteappid',function (result) { remoteAppID=result.remoteappid; sendId.value=remoteAppID; }); var logField = document.getElementById("log"); var selectSave=document.getElementById("selectSave"); var sendId=document.getElementById("sendId"); var clearLog=document.getElementById("clearLog"); var saveId=document.getElementById("saveId"); saveId.addEventListener('click', function() { var remoteID=sendId.value; appendLog("Saved"); chrome.storage.local.set({'remoteappid': remoteID}); //alert (remoteID); }); var chosenFileEntry=null; clearLog.addEventListener('click', function() { logField.innerText=''; }); errorHandler = function (obj) { sendResponse({"result":"Something Went Wrong."}); console.log(obj); }; selectSave.addEventListener('click', function() { chrome.fileSystem.chooseEntry({type: 'saveFile', suggestedName: 'output.csv'}, function(writableFileEntry) { chosenFileEntry=writableFileEntry; writableFileEntry.createWriter(function(writer) { writer.seek(0); writer.truncate(0); writer.onwriteend = function(e) { }; writer.write(new Blob(['0123456789'], {type: 'text/plain'} )); }, errorHandler); }); }); chrome.runtime.onMessageExternal.addListener( function(request, sender, sendResponse) { if (request.myAction) { appendLog("Action from "+sender.id+": "+request.myAction); //appendLog("Message from "+sender.id+": "+request.myCustomMessage); if (request.myAction=='save' && chosenFileEntry==null){ sendResponse({"result":"No Save File Configured","Success":"false"}); } if (request.myAction=='save' && chosenFileEntry!=null){ var arrData = typeof JSONData != 'object' ? JSON.parse(request.myCustomMessage) : request.myCustomMessage; var CSV = ''; for (var i = 0; i < arrData.length; i++) { var row = ""; for (var index in arrData[i]) { //row += '"' + arrData[i][index] + '",'; row += arrData[i][index] + ','; } row.slice(0, row.length - 1); CSV += row + '\r\n'; } appendLog("Saving CSV : "); chosenFileEntry.file(function(file) { var reader = new FileReader(); reader.onload = function(e) { var contents =e.target.result; if (request.myIndex==1){ contents=''; appendLog("Clearing Contents"); } appendLog("Saving Index : " + request.myIndex);//+CSV); CSV=contents+CSV; chrome.fileSystem.getWritableEntry(chosenFileEntry, function(writableFileEntry) { writableFileEntry.createWriter(function(writer) { writer.onwriteend = function(e) { appendLog("Save Complete - Sending Message"); sendReply("true","Ok Saved :",request.myIndex); }; chosenFileEntry.file(function(file) { writer.write(new Blob([CSV], {type: 'text/plain'})); }); }, errorHandler); }); }; reader.readAsText(file); }); } } else { sendResponse({"result":"Ops, I don't understand this message :" + request}); } }); function sendReply(myResult,myResultAction,pageIndex){ chrome.runtime.sendMessage( sendId.value, {result: myResult, myResultAction : myResultAction, myResultIndex : pageIndex}, function(response) { console.log("response: "+JSON.stringify(response)); appendLog("response: "+JSON.stringify(response)); } ) } var appendLog = function(message) { logField.innerText+="\n"+message; }; context.appendLog = appendLog; })(window)
manifest.json
- manifest.json
{ "manifest_version": 2, "name": "AG Data Saver Application", "description": "Application to manage the save sink events from the parser.", "version": "1.1", "minimum_chrome_version": "23", "icons": { "16": "icon_16.png" }, "app": { "background": { "scripts": ["main.js"] } }, "permissions": [{"fileSystem": ["write", "retainEntries", "directory"]},"storage"] }
web_browsers/chrome_screen_scraper/chrome_screen_scraper.1468230621.txt.gz · Last modified: 2020/07/15 09:30 (external edit)