User Tools

Site Tools


web_browsers:chrome_screen_scraper:chrome_screen_scraper

This is an old revision of the document!


Web Browsers - Chrome Screen Scraper

Composed of two parts.

  • Parser
  • SaverApp

Load the two extension files into Chrome

To load these into chrome, you will need to go to the settings menu, choose more tools and then extensions.

  • Tick the “Developer Mode” box that you see, this will allow you to load unpacked extensions.
  • Click on the button “Load unpacked extension” and navigate to the directory where the Parser files exist.
  • Click on the button “Load unpacked extension” and navigate to the directory where the SaveApp files exist.

Once this has been completed, you will see a new icon appear as shown below

Parser

index.html

index.html
<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <link href="main.css" rel="stylesheet">
</head>
 
<body>
  <h3>Data Parser</h3>
  <label>Parser ID is <input id="appid" type="text"  readonly></input></label>
  <div>
    <label for="sendId">Destination ID</label>
    <input id="sendId" type="text"></input>
 
    <button id="saveID">Save ID</button>
    <button id="process">Start Process</button>
  </div>
  <div id="log"></div>
 
  <script src="index.js"></script>
</body>
 
</html>

main.css

main.css
input[type="text"] {
  width: 240px;
}
 
#log {
  background-color: rgb(226, 226, 250);
  padding: 10px 20px;
  margin-top: 10px;
  height: 300px;
  border: 1px solid black;
  overflow-y: scroll;
  overflow-x: hidden;
}

index.js

index.js
(function(context){
  document.getElementById("appid").value=chrome.runtime.id;
  var remoteAppID ="";
  var action ="";
  var pageIndex=null;
  console.log('Starting');
  var sendId;
  sendId=document.getElementById("sendId");
 
  chrome.storage.local.get('remoteappid',function (result) {
    console.log(result);
    remoteAppID=result.remoteappid;
    console.log('In Loop' +remoteAppID);
    sendId.value=remoteAppID;
  });
 
  var logField = document.getElementById("log");
  var save=document.getElementById("saveID");
  var process=document.getElementById("process");
  save.addEventListener('click', function() {
    var remoteID=sendId.value;
    appendLog("Saved");
    chrome.storage.local.set({'remoteappid': remoteID});
  });
 
  process.addEventListener('click', function() {
    pageIndex=1;
    chrome.tabs.query(
      { currentWindow: true, active: true },
      function (tabArray) {
        var activeTabId= tabArray[0];
        chrome.tabs.sendMessage(activeTabId.id, {text: 'report_back',pageIndex: pageIndex}, doStuffWithDom);
      }
    );
  });
 
  function doStuffWithDom(res) {
    var domContent=res.domContent;
    var pageIndex=res.pageIndex;
 
    //console.log('I received the following DOM content:\n' + domContent);
 
    chrome.runtime.sendMessage(
      sendId.value,
      {myCustomMessage: JSON.stringify(domContent),myAction : "save",myIndex : pageIndex},
      function(response) {
        appendLog("Remote Message : " + response.result);
        if (response.Success=="false"){
          console.log("Remote Error : " + response.result);
          appendLog("Remote Error : " + response.result);
        }else{
          chrome.tabs.query(
            { currentWindow: true, active: true },
            function (tabArray) {
              var activeTabId= tabArray[0];
              chrome.tabs.sendMessage(activeTabId.id, {text: 'process_next'},dealWithNextButton);
            }
          );
          console.log(response.result);
          appendLog(response.result);
        }
      })
    }
 
    chrome.runtime.onMessageExternal.addListener(
      function(request, sender, sendResponse) {
        appendLog("Remote Message : " + request.myResultAction);
        if (request.result=="false"){
          console.log("Remote Error : " + request.myResultAction);
          appendLog("Remote Error : " + request.myResultAction);
        }else{
          chrome.tabs.query(
            { currentWindow: true, active: true },
            function (tabArray) {
              var activeTabId= tabArray[0];
              chrome.tabs.sendMessage(activeTabId.id, {text: 'process_next'},dealWithNextButton);
            }
          );
          console.log(request.result);
          appendLog(request.result);
        }
      }
    );
 
    function dealWithNextButton(res){
      var result = res.success;
      if (result==false){
        appendLog("Finished.");
        pageIndex=null;
      }else{
        appendLog("Still Processing.");
      }
    }
 
    chrome.webNavigation.onDOMContentLoaded.addListener(function (details) {
      if (pageIndex !=null){
        pageIndex+=1;
        chrome.tabs.query(
          { currentWindow: true, active: true },
          function (tabArray) {
            var activeTabId= tabArray[0];
            chrome.tabs.sendMessage(activeTabId.id, {text: 'report_back',pageIndex: pageIndex}, doStuffWithDom);
          }
        );
      }
    }
  );
 
  function doInCurrentTab(tabCallback) {
  }
 
 
  var appendLog = function(message) {
    logField.innerText+="\n"+message;
  };
 
  context.appendLog = appendLog;
})(window)

content.js

content.js
  // Listen for messages
  chrome.runtime.onMessage.addListener(function (msg, sender, sendResponse) {
    console.log('got here');
    // If the received message has the expected format...
    if (msg.text === 'report_back') {
      // Call the specified callback, passing
      // the web-page's DOM content as argument
      var tbl = document.getElementsByName('table1')[0].innerHTML;
      //sendResponse(document.all[0].outerHTML);
 
 
      var rows = [];
      var result = [];
      $('table[name=table1]>tbody>tr').each(function(id){
        var row = {'id': id+1};
        if ($(this).find('td').length!=0){
          $(this).find('td').each(function(index){
            row[index] = $(this).text();
          });
          result.push(row);
        }
      });
 
      var retResult = ({'domContent': result,'tableContent': result,'pageIndex':msg.pageIndex});
      sendResponse(retResult);
    }
    if (msg.text === 'process_next') {
      var nextButton = $('[name="nextButton"]');
      if (nextButton.length>0){
        nextButton.trigger( "click" );
        var retResultS = ({'success':true});
        sendResponse(retResultS);
      }else{
        var retResultF = ({'success':false});
        sendResponse(retResultF);
      }
    }
  });

eventPage.js

eventPage.js
var blacklistedIds = ["none"];
 
chrome.runtime.onMessageExternal.addListener(
  function(request, sender, sendResponse) {
    appendLog("MSG RCV : " + request.myResultAction+' ' +request.myResultIndex);
    if (request.myResultAction=='Ok Saved :') {
      appendLog(request.myResultAction+' ' +request.myResultIndex);
      sendResponse({"result":"Ok, got your message"});
    } else {
      sendResponse({"result":"Ops, I don't understand this message :" + request.myResultAction});
    }
  }
);

mainfest.json

manifest.json
{
  "name": "AG Data Parser Extension",
  "version": "1.1",
  "description": "Extension to parse pages and send them to the save sink.",
  "browser_action": {
    "default_title": "Send message to other apps",
    "default_icon": "icon_16.png",
    "default_popup": "index.html"
  },
  "background": {
    "scripts": ["eventPage.js"],
    "persistent": false
  },
  "content_scripts": [{
    "matches": ["<all_urls>"],
    "js": ["content.js","jquery.js"],
    "run_at": "document_end"
  }],
  "permissions": [ "activeTab", "notifications","storage","webNavigation"],
  "manifest_version": 2
}

Save Sink Events

index.html

index.html
<!DOCTYPE html>
 
<html>
<head>
  <meta charset="utf-8">
  <link href="main.css" rel="stylesheet">
</head>
 
<body>
  <h3>Data Parser</h3>
  <label>Parser ID is <input id="appid" type="text"  readonly></input></label>
  <div>
    <label for="sendId">Destination ID</label>
    <input id="sendId" type="text"></input>
    <button id="saveID">Save ID</button>
    <button id="process">Start Process</button>
  </div>
 
  <div id="log"></div>
 
  <script src="index.js"></script>
</body>
</html>

main.css

main.css
input[type="text"] {
  width: 240px;
}
 
#log {
  background-color: rgb(226, 226, 250);
  padding: 10px 20px;
  margin-top: 10px;
  height: 300px;
  border: 1px solid black;
  overflow-y: scroll;
  overflow-x: hidden;
}

index.js

index.js
(function(context){
  document.getElementById("appid").value=chrome.runtime.id;
 
  var remoteAppID ="";
 
  chrome.storage.local.get('remoteappid',function (result) {
    remoteAppID=result.remoteappid;
    sendId.value=remoteAppID;
  });
 
  var logField = document.getElementById("log");
  var selectSave=document.getElementById("selectSave");
  var sendId=document.getElementById("sendId");
  var clearLog=document.getElementById("clearLog");
  var saveId=document.getElementById("saveId");
 
  saveId.addEventListener('click', function() {
    var remoteID=sendId.value;
    appendLog("Saved");
    chrome.storage.local.set({'remoteappid': remoteID});
    //alert (remoteID);
  });
 
  var chosenFileEntry=null;
 
  clearLog.addEventListener('click', function() {
    logField.innerText='';
  });
 
  errorHandler = function (obj) {
    sendResponse({"result":"Something Went Wrong."});
    console.log(obj);
  };
 
  selectSave.addEventListener('click', function() {
    chrome.fileSystem.chooseEntry({type: 'saveFile',
      suggestedName: 'output.csv'},
 
      function(writableFileEntry) {
        chosenFileEntry=writableFileEntry;
 
        writableFileEntry.createWriter(function(writer) {
          writer.seek(0);
          writer.truncate(0);
          writer.onwriteend = function(e) {
          };
 
          writer.write(new Blob(['0123456789'],
            {type: 'text/plain'}
          ));
        }, errorHandler);
      });
    });
 
 
    chrome.runtime.onMessageExternal.addListener(
      function(request, sender, sendResponse) {
        if (request.myAction) {
          appendLog("Action from "+sender.id+": "+request.myAction);
          //appendLog("Message from "+sender.id+": "+request.myCustomMessage);
 
          if (request.myAction=='save' && chosenFileEntry==null){
            sendResponse({"result":"No Save File Configured","Success":"false"});
          }
 
          if (request.myAction=='save' && chosenFileEntry!=null){
            var arrData = typeof JSONData != 'object' ? JSON.parse(request.myCustomMessage) : request.myCustomMessage;
            var CSV = '';
 
            for (var i = 0; i < arrData.length; i++) {
              var row = "";
 
              for (var index in arrData[i]) {
                //row += '"' + arrData[i][index] + '",';
                row +=  arrData[i][index] + ',';
              }
 
              row.slice(0, row.length - 1);
              CSV += row + '\r\n';
            }
 
            appendLog("Saving CSV : ");
 
            chosenFileEntry.file(function(file) {
              var reader = new FileReader();
 
              reader.onload = function(e) {
                var contents =e.target.result;
 
                if (request.myIndex==1){
                  contents='';
                  appendLog("Clearing Contents");
                }
 
                appendLog("Saving Index : " + request.myIndex);//+CSV);
                CSV=contents+CSV;
 
                chrome.fileSystem.getWritableEntry(chosenFileEntry,
                  function(writableFileEntry) {
                    writableFileEntry.createWriter(function(writer) {
                      writer.onwriteend = function(e) {
                        appendLog("Save Complete - Sending Message");
                        sendReply("true","Ok Saved :",request.myIndex);
                      };
 
                      chosenFileEntry.file(function(file) {
                        writer.write(new Blob([CSV],
                          {type: 'text/plain'}));
                      });
                    }, errorHandler);
                  });
                };
 
                reader.readAsText(file);
              });
            }
          } else {
            sendResponse({"result":"Ops, I don't understand this message :" + request});
          }
        });
 
        function sendReply(myResult,myResultAction,pageIndex){
          chrome.runtime.sendMessage(
            sendId.value,
            {result: myResult, myResultAction : myResultAction, myResultIndex : pageIndex},
 
            function(response) {
              console.log("response: "+JSON.stringify(response));
              appendLog("response: "+JSON.stringify(response));
            }
          )
        }
 
  var appendLog = function(message) {
    logField.innerText+="\n"+message;
  };
 
 
  context.appendLog = appendLog;
})(window)

manifest.json

manifest.json
{
  "manifest_version": 2,
  "name": "AG Data Saver Application",
  "description": "Application to manage the save sink events from the parser.",
  "version": "1.1",
  "minimum_chrome_version": "23",
  "icons": {
    "16": "icon_16.png"
  },
  "app": {
    "background": {
      "scripts": ["main.js"]
    }
  },
  "permissions": [{"fileSystem": ["write", "retainEntries", "directory"]},"storage"]
}
web_browsers/chrome_screen_scraper/chrome_screen_scraper.1468230705.txt.gz · Last modified: 2020/07/15 09:30 (external edit)

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki