खोज…


यूडीटीएफ उदाहरण और उपयोग

Org.apache.hadoop.hive.ql.udf.generic.GenericUDTF इंटरफ़ेस द्वारा प्रस्तुत उपयोगकर्ता परिभाषित तालिका फ़ंक्शन। यह फ़ंक्शन एक इनपुट के लिए कई पंक्तियों और कई स्तंभों को आउटपुट करने की अनुमति देता है।

हमें नीचे दी गई विधियों को ओवरराइट करना होगा:

 1.we specify input and output parameters
abstract StructObjectInspector initialize(ObjectInspector[] args) 
                                                throws UDFArgumentException; 

 2.we process an input record and write out any resulting records 
abstract void process(Object[] record) throws HiveException;

3.function is Called to notify the UDTF that there are no more rows to process. 
    Clean up code or additional output can be produced here.
abstract void close() throws HiveException;




import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

public class NameParserGenericUDTF extends GenericUDTF {
      private PrimitiveObjectInspector stringOI = null;
      
       //Defining input argument as string.
      @Override
      public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
        if (args.length != 1) {
          throw new UDFArgumentException("NameParserGenericUDTF() takes exactly one argument");
        }

        if (args[0].getCategory() != ObjectInspector.Category.PRIMITIVE
            && ((PrimitiveObjectInspector) args[0]).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
          throw new UDFArgumentException("NameParserGenericUDTF() takes a string as a parameter");
        }
        
        // input
        stringOI = (PrimitiveObjectInspector) args[0];

        // output 
        List<String> fieldNames = new ArrayList<String>(2);
        List<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(2);
        fieldNames.add("name");
        fieldNames.add("surname");
        fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
      }
            
      public ArrayList<Object[]> processInputRecord(String name){
            ArrayList<Object[]> result = new ArrayList<Object[]>();
          
            // ignoring null or empty input
            if (name == null || name.isEmpty()) {
              return result;
            }
            
            String[] tokens = name.split("\\s+");
            
            if (tokens.length == 2){
                result.add(new Object[] { tokens[0], tokens[1] });
            }else if (tokens.length == 4 && tokens[1].equals("and")){
                result.add(new Object[] { tokens[0], tokens[3] });
                result.add(new Object[] { tokens[2], tokens[3] });
            }
            
            return result;
      }
      
      @Override
      public void process(Object[] record) throws HiveException {
        final String name = stringOI.getPrimitiveJavaObject(record[0]).toString();
        ArrayList<Object[]> results = processInputRecord(name);
        
        Iterator<Object[]> it = results.iterator();
        
        while (it.hasNext()){
            Object[] r = it.next();
            forward(r);
        }
      }

      @Override
      public void close() throws HiveException {
        // do nothing
      }
}

कोड को जार में पैकेज करें और हाइव संदर्भ में जार को जोड़ने की आवश्यकता है।

hive> CREATE TEMPORARY FUNCTION process_names as 'jar.path.NameParserGenericUDTF'; 

Here we will pass input as full name and break it into first and last name.

hive> SELECT 
   t.name,
   t.surname 
FROM people 
    lateral view process_names(name) t as name, surname;

Teena Carter
John Brownewr


Modified text is an extract of the original Stack Overflow Documentation
के तहत लाइसेंस प्राप्त है CC BY-SA 3.0
से संबद्ध नहीं है Stack Overflow