1

I have written the Spark Java UDF to return RandomNumber Generation based on requirement. Below code is returning NEW value for every time I run the code in eclipse. But when I call this Java UDF in spark sql its returning same value.

Registering UDF in spark spark.udf.registerJavaFunction("getGeneratedRand","com.test.RandNumGenerator",StringType())

UDF Code for Random num generation

   import java.util.Calendar;
    import java.util.Random;
    import org.apache.spark.api.java.*;
    import org.apache.spark.SparkConf;
    import org.apache.spark.sql.*;
    import org.apache.spark.sql.api.java.UDF0;
    import org.apache.spark.sql.api.java.UDF1;
    import org.apache.spark.sql.types.DataTypes;
    import org.apache.commons.lang3.RandomStringUtils;

    public class RandNumGenerator implements UDF0<String>
    {       static char[] characters;
            static Long randomSeed = null;
            static Random random = null;

        private  Random getRandom() {

              if (random == null) {

             synchronized (RandNumGenerator.class) {

            if (random == null) {

             randomSeed = System.currentTimeMillis();
             random = new Random(randomSeed);
                   }

                 }

             }

             return random;
        }

        @Override
        public String call() throws Exception {
        {       
                if(characters==null){

                   characters = new char[] {'1','2','3','4','5','6','7','8','9'};

            }
                String generatedRandom="";
                generatedRandom =  RandomStringUtils.random(12, 0, 0, false, true, characters,
                            getRandom());

                Calendar cal = Calendar.getInstance();
                String second = Integer.toString(cal.get(cal.SECOND));
                if(second.length()<2){
                        second = "0"+second;
                }

                String millisecond = Integer.toString(cal.get(cal.MILLISECOND));

                if(millisecond.length()==1){

                        millisecond = "00"+millisecond;
                }else if(millisecond.length()==2){
                        millisecond = "0"+millisecond;
                }

                generatedRandom = "NEW_" + generatedRandom + second + millisecond ;
                return generatedRandom;

        }
        }
        public static void main(String[] args) throws Exception {
            characters = new char[] {'1','2','3','4','5','6','7','8','9'};
            RandNumGenerator obj = new RandNumGenerator();
            obj.call();
            String res = obj.call();
            System.out.print(res);

        }     
}

Calling UDF in spark UDF

spark.sql("select getGeneratedRand(),getGeneratedRand() from db.test_tbl").show(20,False)

Result:

+---------------------------------+---------------------------------+
|UDF:getGeneratedRand()           |UDF:getGeneratedRand()|
+---------------------------------+---------------------------------+
|NEW_26481847455148826            |NEW_26481847455148826            |
|NEW_26481847455148826            |NEW_26481847455148826            |
|NEW_26481847455148826            |NEW_26481847455148826            |
|NEW_26481847455148826            |NEW_26481847455148826            |
|NEW_26481847455148826            |NEW_26481847455148826            |
|NEW_26481847455148826            |NEW_26481847455148826            |
|NEW_26481847455148826            |NEW_26481847455148826            |
marjun
  • 696
  • 5
  • 17
  • 30
  • 1
    I have resolved this issue by passing an PK column to the UDF to generate Random Number for every row. – marjun Jan 03 '19 at 08:39

0 Answers0