web_browsers:chrome_screen_scraper:chrome_screen_scraper
This is an old revision of the document!
Table of Contents
Web Browsers - Chrome Screen Scraper
Parser
index.html
- index.html
<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <link href="main.css" rel="stylesheet"> </head> <body> <h3>Data Parser</h3> <label>Parser ID is <input id="appid" type="text" readonly></input></label> <div> <label for="sendId">Destination ID</label> <input id="sendId" type="text"></input> <button id="saveID">Save ID</button> <button id="process">Start Process</button> </div> <div id="log"></div> <script src="index.js"></script> </body> </html>
main.css
- main.css
input[type="text"] { width: 240px; } #log { background-color: rgb(226, 226, 250); padding: 10px 20px; margin-top: 10px; height: 300px; border: 1px solid black; overflow-y: scroll; overflow-x: hidden; }
index.js
- index.js
(function(context){ document.getElementById("appid").value=chrome.runtime.id; var remoteAppID =""; var action =""; var pageIndex=null; console.log('Starting'); var sendId; sendId=document.getElementById("sendId"); chrome.storage.local.get('remoteappid',function (result) { console.log(result); remoteAppID=result.remoteappid; console.log('In Loop' +remoteAppID); sendId.value=remoteAppID; }); var logField = document.getElementById("log"); var save=document.getElementById("saveID"); var process=document.getElementById("process"); save.addEventListener('click', function() { var remoteID=sendId.value; appendLog("Saved"); chrome.storage.local.set({'remoteappid': remoteID}); }); process.addEventListener('click', function() { pageIndex=1; chrome.tabs.query( { currentWindow: true, active: true }, function (tabArray) { var activeTabId= tabArray[0]; chrome.tabs.sendMessage(activeTabId.id, {text: 'report_back',pageIndex: pageIndex}, doStuffWithDom); } ); }); function doStuffWithDom(res) { var domContent=res.domContent; var pageIndex=res.pageIndex; //console.log('I received the following DOM content:\n' + domContent); chrome.runtime.sendMessage( sendId.value, {myCustomMessage: JSON.stringify(domContent),myAction : "save",myIndex : pageIndex}, function(response) { appendLog("Remote Message : " + response.result); if (response.Success=="false"){ console.log("Remote Error : " + response.result); appendLog("Remote Error : " + response.result); }else{ chrome.tabs.query( { currentWindow: true, active: true }, function (tabArray) { var activeTabId= tabArray[0]; chrome.tabs.sendMessage(activeTabId.id, {text: 'process_next'},dealWithNextButton); } ); console.log(response.result); appendLog(response.result); } }) } chrome.runtime.onMessageExternal.addListener( function(request, sender, sendResponse) { appendLog("Remote Message : " + request.myResultAction); if (request.result=="false"){ console.log("Remote Error : " + request.myResultAction); appendLog("Remote Error : " + request.myResultAction); }else{ chrome.tabs.query( { currentWindow: true, active: true }, function (tabArray) { var activeTabId= tabArray[0]; chrome.tabs.sendMessage(activeTabId.id, {text: 'process_next'},dealWithNextButton); } ); console.log(request.result); appendLog(request.result); } } ); function dealWithNextButton(res){ var result = res.success; if (result==false){ appendLog("Finished."); pageIndex=null; }else{ appendLog("Still Processing."); } } chrome.webNavigation.onDOMContentLoaded.addListener(function (details) { if (pageIndex !=null){ pageIndex+=1; chrome.tabs.query( { currentWindow: true, active: true }, function (tabArray) { var activeTabId= tabArray[0]; chrome.tabs.sendMessage(activeTabId.id, {text: 'report_back',pageIndex: pageIndex}, doStuffWithDom); } ); } } ); function doInCurrentTab(tabCallback) { } var appendLog = function(message) { logField.innerText+="\n"+message; }; context.appendLog = appendLog; })(window)
content.js
- content.js
// Listen for messages chrome.runtime.onMessage.addListener(function (msg, sender, sendResponse) { console.log('got here'); // If the received message has the expected format... if (msg.text === 'report_back') { // Call the specified callback, passing // the web-page's DOM content as argument var tbl = document.getElementsByName('table1')[0].innerHTML; //sendResponse(document.all[0].outerHTML); var rows = []; var result = []; $('table[name=table1]>tbody>tr').each(function(id){ var row = {'id': id+1}; if ($(this).find('td').length!=0){ $(this).find('td').each(function(index){ row[index] = $(this).text(); }); result.push(row); } }); var retResult = ({'domContent': result,'tableContent': result,'pageIndex':msg.pageIndex}); sendResponse(retResult); } if (msg.text === 'process_next') { var nextButton = $('[name="nextButton"]'); if (nextButton.length>0){ nextButton.trigger( "click" ); var retResultS = ({'success':true}); sendResponse(retResultS); }else{ var retResultF = ({'success':false}); sendResponse(retResultF); } } });
eventPage.js
- eventPage.js
var blacklistedIds = ["none"]; chrome.runtime.onMessageExternal.addListener( function(request, sender, sendResponse) { appendLog("MSG RCV : " + request.myResultAction+' ' +request.myResultIndex); if (request.myResultAction=='Ok Saved :') { appendLog(request.myResultAction+' ' +request.myResultIndex); sendResponse({"result":"Ok, got your message"}); } else { sendResponse({"result":"Ops, I don't understand this message :" + request.myResultAction}); } } );
mainfest.json
- manifest.json
{ "name": "AG Data Parser Extension", "version": "1.1", "description": "Extension to parse pages and send them to the save sink.", "browser_action": { "default_title": "Send message to other apps", "default_icon": "icon_16.png", "default_popup": "index.html" }, "background": { "scripts": ["eventPage.js"], "persistent": false }, "content_scripts": [{ "matches": ["<all_urls>"], "js": ["content.js","jquery.js"], "run_at": "document_end" }], "permissions": [ "activeTab", "notifications","storage","webNavigation"], "manifest_version": 2 }
Save Sink Events
web_browsers/chrome_screen_scraper/chrome_screen_scraper.1468229317.txt.gz ยท Last modified: 2020/07/15 09:30 (external edit)