The template in this sample is for JS2, but can be used for any JS step from 1 through to 7.
This template also includes code for debugging in Microsoft Visual Studio.
//Language Studio Pre-Processing Script //Copyright 2008-2016 Omniscien Technologies Pte Ltd. All Rights Reserved. //NOTE: This script is designed to be debugged in Microsoft Visual Studio. // To run without the Microsoft JavaScript debugger type following command line: // cscript default.pre.3.js // // To run with the Microsoft JavaScript debugger type the following command line: // cscript default.pre.3.js //D //X // // No changes are necessary to run in production, simply upload and the code will autodetect that it is in the Language Studio production environment. //IMPORTANT: There are very minor differences in RegEx between Microsoft and Oracle Java implementations. // //Description: //Template - Below are the typical tasks performed in JS2 //1. Document level normalization of encoding and content formats (i.e. csv, html, ANSI in UTF-8, etc.) //2. Basic repairs of things that would possibly cause issues in translation quality (NOTE: may be better to do most in JS3 if they do not impact TU Extract) //3. Apply any special cases for splitting segments. (e.g. rules to handle exceptions in splitting segments) //4. Mark content that should not be translated which will then be excluded from the TU extract step. // var iJSNo = 2; //The number of the JS Script var sSourceLang = "en"; //SET YOUR SOURCE LANGUAGE var sTargetLang = "ja"; //SET YOUR TARGET LANGUAGE //DO NOT CHANGE var isDebug = 0; var isExternalTest = 0; var logText = ""; var sLangPair = ""; //Detects if we are running in the Language Studio Enterprise runtime or development environment var bIsDev = ((typeof AOSession) == "undefined"); if (bIsDev) { isDebug = 1; isExternalTest = 1; //Set the paths to load the input files from for testing in Microsoft Visual Studio debugger. var sTestInFilePath = currentPath() + "\\In" + iJSNo + "." + sSourceLang + ".txt"; var sTestOutFilePath = currentPath() + "\\Out" + iJSNo + "." + sSourceLang + ".txt"; } //USER VARIABLES //Declare your global variables here //Add prototypes if needed if (!String.prototype.trim) { String.prototype.trim = function () { return this.replace(/^\s+|\s+$/g, ''); } } function main(sAllSourceSegments) { //Initialize log("main (start)"); initializeLang(); //Initialize setScriptNo(); //Process the all segments as 1 block of text sAllSourceSegments = processAllSourceSegments(sAllSourceSegments); //Process each segment line one at a time - MUCH SLOWER - TRY TO AVOID UNLESS ABSOLUTELY NECESSARY sAllSourceSegments = processEachSegment(sAllSourceSegments); //Cleanup and make sure none of the formatting has intruduced avoidable errors. sAllSourceSegments = processFinalCleanup(sAllSourceSegments) log("main (exit)"); if (logText.length > 0) sAllSourceSegments = sAllSourceSegments + "\r\n<notran>AOSPLITLOGAO\r\n" + logText + "</notran>"; // Return output return sAllSourceSegments; } //Process the entire source as 1 block of data - faster than single Segment function processAllSourceSegments(sAllSourceSegments) { log("processAllSourceSegments (start)"); //YOUR CODE HERE log("processAllSourceSegments (exit)"); return sAllSourceSegments; } //Processes a single segment - slower as it loops through each segment function processSegment(sSegment) { //YOUR CODE HERE return sSegment; } //----------------------------------------------------------------- //Standard functions do not modify below this point //----------------------------------------------------------------- function processEachSegment(sAllSourceSegments) { //NOTE: If at all possible, put your code in processAllSegments //Line break must be \n not \r\n var aSourceSegments = sAllSourceSegments.split("\n"); for (var i in aSourceSegments) { aSourceSegments[i] = processSegment(aSourceSegments[i]); } sAllSourceSegments = aSourceSegments.join("\n"); return sAllSourceSegments; } function removeBOM(sIn) { var sBOM = "\xef\xbb\xbf"; if (sIn.indexOf(sBOM) == 0) { return sIn.substring(sBOM.length, sIn.length - sBOM.length); } else { return sIn; } } function stringCount(sIn, sCount) { return sIn.split(sCount).length - 1; } function Upper(input) { var sOut = input.toUpperCase(); return sOut; } function Lower(input) { var sOut = input.toLowerCase(); return sOut; } function initializeLang() { sSourceLang = sSourceLang.toLowerCase(); sTargetLang = sTargetLang.toLowerCase(); sLangPair = sSourceLang + '-' + sTargetLang; } function processFinalCleanup(sAllSourceSegments) { log("processFinalCleanup (start)"); regexp = new RegExp(/(<\/notran><notran>)/g); // sAllSourceSegments = sAllSourceSegments.replace(regexp, ""); if (!isDebug) { //Cleanup markers sAllSourceSegments = sAllSourceSegments.replace(RegExp(/AOFORWARDSLASHAO/gmi), '/'); sAllSourceSegments = sAllSourceSegments.replace(RegExp(/AOBACKSLASHAO/gmi), '\\'); sAllSourceSegments = sAllSourceSegments.replace(RegExp(/AOXAO/gmi), 'x'); sAllSourceSegments = sAllSourceSegments.replace(RegExp(/AOATAO/gmi), '@'); sAllSourceSegments = sAllSourceSegments.replace(RegExp(/AOATDASHATAO/gmi), '@-@'); sAllSourceSegments = sAllSourceSegments.replace(RegExp(/AODASHAO/gmi), '-'); sAllSourceSegments = sAllSourceSegments.replace(RegExp(/AOPLUSAO/gmi), '+'); sAllSourceSegments = sAllSourceSegments.replace(RegExp(/AOQUOTEAO|AODOUBLEQUOTEAO/gmi), '"'); sAllSourceSegments = sAllSourceSegments.replace(RegExp(/AOSINGLEQUOTEAO/gmi), "'"); } sAllSourceSegments = sAllSourceSegments.replace(RegExp(/(<notran>[ ]{0,}(\-|\+)[ ]{0,}<\/notran>)/gmi), "<notran> $2 </notran>"); if (!isExternalTest) { log("processFinalCleanup (Internal adjustments)"); //Change the content to internal formats if past JS2 if (iJSNo > 2) { //Control XML sAllSourceSegments = sAllSourceSegments.replace(RegExp(/<wall\/>/g), "<wall/>"); sAllSourceSegments = sAllSourceSegments.replace(RegExp(/<\/zone>/g), "</zone>"); sAllSourceSegments = sAllSourceSegments.replace(RegExp(/<zone>/g), "<zone>"); sAllSourceSegments = sAllSourceSegments.replace(RegExp(/<notrani>(.*?)<\/notrani>/g), "<aotran type=\"notran\" translation=\"$1\">aonotranao</aotran>"); sAllSourceSegments = sAllSourceSegments.replace(RegExp(/<notran>(.*?)<\/notran>/g), "<aotran type=\"notran\" translation=\"$1\">aonotranao</aotran>"); sAllSourceSegments = sAllSourceSegments.replace(RegExp(/<notran>(.*?)<\/notran>/g), "<aotran type=\"notran\" translation=\"$1\">aonotranao</aotran>"); //Multiple White Space sAllSourceSegments = sAllSourceSegments.replace(RegExp(/[ ]{2,}/g), " "); } } else { //Check for test case markers and mark them to not be translated //regexp = new RegExp(/(##TESTCASE)([^\r\n]*)/g); sAllSourceSegments = sAllSourceSegments.replace(RegExp(/(##TESTCASE)([^\r\n]*)/g), "<notran>$1$2</notran>"); } //BOM safety check sAllSourceSegments = removeBOM(sAllSourceSegments); log("processFinalCleanup (exit)"); return sAllSourceSegments; } function log(sText) { if (isDebug == 1) { var now = new Date(); logText += "\r\n" + formatLogDate(now) + "\t" + sText; } } function formatLogDate(dDate) { //zero-pad a single zero if needed var zp = function (val) { return (val <= 9 ? '0' + val : '' + val); } //zero-pad up to two zeroes if needed var zp2 = function (val) { return val <= 99 ? (val <= 9 ? '00' + val : '0' + val) : ('' + val); } var d = dDate.getDate(); var m = dDate.getMonth() + 1; var y = dDate.getFullYear(); var h = dDate.getHours(); var min = dDate.getMinutes(); var s = dDate.getSeconds(); var ms = dDate.getMilliseconds(); return '' + y + '-' + zp(m) + '-' + zp(d) + ' ' + zp(h) + ':' + zp(min) + ':' + zp(s) + '.' + zp2(ms); } function setScriptNo() { //Autodetect the script number from the name if (bIsDev) { var sNum = WScript.ScriptName.toString().replace(RegExp(/(.{0,})(\d{1,})(.{0,})/gmi), "$2"); iJSNo = parseInt(sNum); } else { if (iJSNo = 0) { iJSNo = parseInt(AOSession.getValue("ScriptNo")); } } } //WINDOWS TEST FUNCTIONS - For use with WScript if (isExternalTest) ExternalTest(); function ExternalTest() { if (isDebug) WScript.Echo('--Start Test--'); //Read input from a file var sAllSourceSegments = fileToString(sTestInFilePath); //Normalize CRLF to \n sAllSourceSegments = sAllSourceSegments.replace(RegExp(/\r\n|\r/g), "\n"); //Execute test as a normal pre-processing step would execute it. var sOut = main(sAllSourceSegments); //Write the output to a file stringToFile(sTestOutFilePath, sOut); if (isDebug) { if (logText.length > 0) { WScript.Echo(logText); } WScript.Echo('--End Test--'); } } function currentPath() { return new ActiveXObject("WScript.Shell").CurrentDirectory; } //Load the UTF-8 file into a string function fileToString(sFilePath) { //Relies on Microsoft ActiveX Data Objects (ADO) which much be installed - var oStream = new ActiveXObject("ADODB.Stream"); oStream.CharSet = "utf-8"; oStream.Open; oStream.LoadFromFile(sFilePath); var sData = removeBOM(oStream.ReadText()); oStream.Close(); return sData; } //Write a string out to a UTF-8 file function stringToFile(sFilePath, sData) { //Relies on Microsoft ActiveX Data Objects (ADO) which much be installed - var oStream = new ActiveXObject("ADODB.Stream"); oStream.CharSet = "utf-8"; oStream.Open oStream.WriteText(sData); oStream.SaveToFile(sFilePath, 2); oStream.Flush(); oStream.Close(); }