This is an old revision of the document!
Table of Contents
Web Browsers - Chrome Screen Scraper
Composed of two parts.
- Parser
- SaverApp
Load the two extension files into Chrome
To load these into chrome, you will need to go to the settings menu, choose more tools and then extensions.
- Tick the “Developer Mode” box that you see, this will allow you to load unpacked extensions.
- Click on the button “Load unpacked extension” and navigate to the directory where the Parser files exist.
- Click on the button “Load unpacked extension” and navigate to the directory where the SaveApp files exist.
Once this has been completed, you will see a new icon appear as shown below
This will be used to start the page parsing.
Firstly though, you will need to launch the App, this is done by clicking on the “Launch” link as shown below
You need to configure the 2 parts so that they can communicate with each other, this is done by the IDs that are given to each of the APP/Extension.
You should now see a new window appear
You can click in the “my Application ID” box and copy the ID from there (just swoop the mouse over it whilst holding the left button down so that it selects it in blue and then CTRL+C to copy it.
You can now click the Speech mark button
To launch the Data Parser Extension
Parser
index.html
- index.html
<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <link href="main.css" rel="stylesheet"> </head> <body> <h3>Data Parser</h3> <label>Parser ID is <input id="appid" type="text" readonly></input></label> <div> <label for="sendId">Destination ID</label> <input id="sendId" type="text"></input> <button id="saveID">Save ID</button> <button id="process">Start Process</button> </div> <div id="log"></div> <script src="index.js"></script> </body> </html>
main.css
- main.css
input[type="text"] { width: 240px; } #log { background-color: rgb(226, 226, 250); padding: 10px 20px; margin-top: 10px; height: 300px; border: 1px solid black; overflow-y: scroll; overflow-x: hidden; }
index.js
- index.js
(function(context){ document.getElementById("appid").value=chrome.runtime.id; var remoteAppID =""; var action =""; var pageIndex=null; console.log('Starting'); var sendId; sendId=document.getElementById("sendId"); chrome.storage.local.get('remoteappid',function (result) { console.log(result); remoteAppID=result.remoteappid; console.log('In Loop' +remoteAppID); sendId.value=remoteAppID; }); var logField = document.getElementById("log"); var save=document.getElementById("saveID"); var process=document.getElementById("process"); save.addEventListener('click', function() { var remoteID=sendId.value; appendLog("Saved"); chrome.storage.local.set({'remoteappid': remoteID}); }); process.addEventListener('click', function() { pageIndex=1; chrome.tabs.query( { currentWindow: true, active: true }, function (tabArray) { var activeTabId= tabArray[0]; chrome.tabs.sendMessage(activeTabId.id, {text: 'report_back',pageIndex: pageIndex}, doStuffWithDom); } ); }); function doStuffWithDom(res) { var domContent=res.domContent; var pageIndex=res.pageIndex; //console.log('I received the following DOM content:\n' + domContent); chrome.runtime.sendMessage( sendId.value, {myCustomMessage: JSON.stringify(domContent),myAction : "save",myIndex : pageIndex}, function(response) { appendLog("Remote Message : " + response.result); if (response.Success=="false"){ console.log("Remote Error : " + response.result); appendLog("Remote Error : " + response.result); }else{ chrome.tabs.query( { currentWindow: true, active: true }, function (tabArray) { var activeTabId= tabArray[0]; chrome.tabs.sendMessage(activeTabId.id, {text: 'process_next'},dealWithNextButton); } ); console.log(response.result); appendLog(response.result); } }) } chrome.runtime.onMessageExternal.addListener( function(request, sender, sendResponse) { appendLog("Remote Message : " + request.myResultAction); if (request.result=="false"){ console.log("Remote Error : " + request.myResultAction); appendLog("Remote Error : " + request.myResultAction); }else{ chrome.tabs.query( { currentWindow: true, active: true }, function (tabArray) { var activeTabId= tabArray[0]; chrome.tabs.sendMessage(activeTabId.id, {text: 'process_next'},dealWithNextButton); } ); console.log(request.result); appendLog(request.result); } } ); function dealWithNextButton(res){ var result = res.success; if (result==false){ appendLog("Finished."); pageIndex=null; }else{ appendLog("Still Processing."); } } chrome.webNavigation.onDOMContentLoaded.addListener(function (details) { if (pageIndex !=null){ pageIndex+=1; chrome.tabs.query( { currentWindow: true, active: true }, function (tabArray) { var activeTabId= tabArray[0]; chrome.tabs.sendMessage(activeTabId.id, {text: 'report_back',pageIndex: pageIndex}, doStuffWithDom); } ); } } ); function doInCurrentTab(tabCallback) { } var appendLog = function(message) { logField.innerText+="\n"+message; }; context.appendLog = appendLog; })(window)
content.js
- content.js
// Listen for messages chrome.runtime.onMessage.addListener(function (msg, sender, sendResponse) { console.log('got here'); // If the received message has the expected format... if (msg.text === 'report_back') { // Call the specified callback, passing // the web-page's DOM content as argument var tbl = document.getElementsByName('table1')[0].innerHTML; //sendResponse(document.all[0].outerHTML); var rows = []; var result = []; $('table[name=table1]>tbody>tr').each(function(id){ var row = {'id': id+1}; if ($(this).find('td').length!=0){ $(this).find('td').each(function(index){ row[index] = $(this).text(); }); result.push(row); } }); var retResult = ({'domContent': result,'tableContent': result,'pageIndex':msg.pageIndex}); sendResponse(retResult); } if (msg.text === 'process_next') { var nextButton = $('[name="nextButton"]'); if (nextButton.length>0){ nextButton.trigger( "click" ); var retResultS = ({'success':true}); sendResponse(retResultS); }else{ var retResultF = ({'success':false}); sendResponse(retResultF); } } });
eventPage.js
- eventPage.js
var blacklistedIds = ["none"]; chrome.runtime.onMessageExternal.addListener( function(request, sender, sendResponse) { appendLog("MSG RCV : " + request.myResultAction+' ' +request.myResultIndex); if (request.myResultAction=='Ok Saved :') { appendLog(request.myResultAction+' ' +request.myResultIndex); sendResponse({"result":"Ok, got your message"}); } else { sendResponse({"result":"Ops, I don't understand this message :" + request.myResultAction}); } } );
mainfest.json
- manifest.json
{ "name": "AG Data Parser Extension", "version": "1.1", "description": "Extension to parse pages and send them to the save sink.", "browser_action": { "default_title": "Send message to other apps", "default_icon": "icon_16.png", "default_popup": "index.html" }, "background": { "scripts": ["eventPage.js"], "persistent": false }, "content_scripts": [{ "matches": ["<all_urls>"], "js": ["content.js","jquery.js"], "run_at": "document_end" }], "permissions": [ "activeTab", "notifications","storage","webNavigation"], "manifest_version": 2 }
Save Sink Events
index.html
- index.html
<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <link href="main.css" rel="stylesheet"> </head> <body> <h3>Data Parser</h3> <label>Parser ID is <input id="appid" type="text" readonly></input></label> <div> <label for="sendId">Destination ID</label> <input id="sendId" type="text"></input> <button id="saveID">Save ID</button> <button id="process">Start Process</button> </div> <div id="log"></div> <script src="index.js"></script> </body> </html>
main.css
- main.css
input[type="text"] { width: 240px; } #log { background-color: rgb(226, 226, 250); padding: 10px 20px; margin-top: 10px; height: 300px; border: 1px solid black; overflow-y: scroll; overflow-x: hidden; }
index.js
- index.js
(function(context){ document.getElementById("appid").value=chrome.runtime.id; var remoteAppID =""; chrome.storage.local.get('remoteappid',function (result) { remoteAppID=result.remoteappid; sendId.value=remoteAppID; }); var logField = document.getElementById("log"); var selectSave=document.getElementById("selectSave"); var sendId=document.getElementById("sendId"); var clearLog=document.getElementById("clearLog"); var saveId=document.getElementById("saveId"); saveId.addEventListener('click', function() { var remoteID=sendId.value; appendLog("Saved"); chrome.storage.local.set({'remoteappid': remoteID}); //alert (remoteID); }); var chosenFileEntry=null; clearLog.addEventListener('click', function() { logField.innerText=''; }); errorHandler = function (obj) { sendResponse({"result":"Something Went Wrong."}); console.log(obj); }; selectSave.addEventListener('click', function() { chrome.fileSystem.chooseEntry({type: 'saveFile', suggestedName: 'output.csv'}, function(writableFileEntry) { chosenFileEntry=writableFileEntry; writableFileEntry.createWriter(function(writer) { writer.seek(0); writer.truncate(0); writer.onwriteend = function(e) { }; writer.write(new Blob(['0123456789'], {type: 'text/plain'} )); }, errorHandler); }); }); chrome.runtime.onMessageExternal.addListener( function(request, sender, sendResponse) { if (request.myAction) { appendLog("Action from "+sender.id+": "+request.myAction); //appendLog("Message from "+sender.id+": "+request.myCustomMessage); if (request.myAction=='save' && chosenFileEntry==null){ sendResponse({"result":"No Save File Configured","Success":"false"}); } if (request.myAction=='save' && chosenFileEntry!=null){ var arrData = typeof JSONData != 'object' ? JSON.parse(request.myCustomMessage) : request.myCustomMessage; var CSV = ''; for (var i = 0; i < arrData.length; i++) { var row = ""; for (var index in arrData[i]) { //row += '"' + arrData[i][index] + '",'; row += arrData[i][index] + ','; } row.slice(0, row.length - 1); CSV += row + '\r\n'; } appendLog("Saving CSV : "); chosenFileEntry.file(function(file) { var reader = new FileReader(); reader.onload = function(e) { var contents =e.target.result; if (request.myIndex==1){ contents=''; appendLog("Clearing Contents"); } appendLog("Saving Index : " + request.myIndex);//+CSV); CSV=contents+CSV; chrome.fileSystem.getWritableEntry(chosenFileEntry, function(writableFileEntry) { writableFileEntry.createWriter(function(writer) { writer.onwriteend = function(e) { appendLog("Save Complete - Sending Message"); sendReply("true","Ok Saved :",request.myIndex); }; chosenFileEntry.file(function(file) { writer.write(new Blob([CSV], {type: 'text/plain'})); }); }, errorHandler); }); }; reader.readAsText(file); }); } } else { sendResponse({"result":"Ops, I don't understand this message :" + request}); } }); function sendReply(myResult,myResultAction,pageIndex){ chrome.runtime.sendMessage( sendId.value, {result: myResult, myResultAction : myResultAction, myResultIndex : pageIndex}, function(response) { console.log("response: "+JSON.stringify(response)); appendLog("response: "+JSON.stringify(response)); } ) } var appendLog = function(message) { logField.innerText+="\n"+message; }; context.appendLog = appendLog; })(window)
manifest.json
- manifest.json
{ "manifest_version": 2, "name": "AG Data Saver Application", "description": "Application to manage the save sink events from the parser.", "version": "1.1", "minimum_chrome_version": "23", "icons": { "16": "icon_16.png" }, "app": { "background": { "scripts": ["main.js"] } }, "permissions": [{"fileSystem": ["write", "retainEntries", "directory"]},"storage"] }