dbrock
May 27, 2011, 12:00am
1
Hello,
There is a use case I am trying to prove out in cloverETL related to transposing name-value pair data. Consider the following feed with standard EAV (entity-attribute-value, or name-value pairs):
ID|NAME|VALUE
1|Color|Red
1|Color|Blue
1|Weight|5
2|Color|Blue
2|Weight|10
2|Height|2
I want to transpose this so that each unique attribute/name (e.g. Color, Weight, Height) become a field in the output metadata.
ID|Color|Weight|Height
1|Red,Blue|5|<null>
2|Blue|10|2
To accomplish this, the metadata on the output edge needs to be dynamic, as it would be impossible to predict each attribute that may be found on the input. The only solution I have encountered for dynamic metadata generation was found here , but this solution is specific to SQL, not flat files.
Is there any approach to make the output edge metadata dynamic or forgiving for a use case such as this?
Hello,
metadata can’t be changed/set during the runtime. All you can do is to analyse the data (see eg. DataProfiling/graph/AdvancedStatistic.grf from our examples ) and then prepare the metadata for the transformation itself.
This worked for me.
<?xml version="1.0" encoding="UTF-8"?>
<Graph author="Administrator" created="Wed Apr 27 15:06:53 EDT 2011" guiVersion="3.0.1" id="1306857528320" licenseCode="CLP1DENDEC30590134BY" licenseType="Commercial Pro" modified="Tue May 31 17:45:10 EDT 2011" modifiedBy="Administrator" name="DynamicDataDenormalization" revision="1.72">
<Global>
<Metadata fileURL="${META_DIR}/denormalizedOutput.fmt" id="Metadata1"/>
<Metadata fileURL="${META_DIR}/denormalizedOutput.fmt" id="Metadata2" previewAttachmentCharset="ISO-8859-1"/>
<Metadata fileURL="${META_DIR}/dynamicMetadata.fmt" id="Metadata8" previewAttachmentCharset="ISO-8859-1"/>
<Metadata fileURL="${META_DIR}/EAV.fmt" id="Metadata0"/>
<Metadata fileURL="${META_DIR}/EAV.fmt" id="Metadata3" previewAttachment="${DATAIN_DIR}/EAV.txt" previewAttachmentCharset="ISO-8859-1"/>
<Metadata fileURL="${META_DIR}/EAV.fmt" id="Metadata4" previewAttachment="${DATAIN_DIR}/EAV.txt" previewAttachmentCharset="ISO-8859-1"/>
<Metadata fileURL="${META_DIR}/test.fmt" id="Metadata5"/>
<Metadata fileURL="${META_DIR}/test.fmt" id="Metadata7" previewAttachmentCharset="ISO-8859-1"/>
<Metadata id="Metadata6" previewAttachmentCharset="ISO-8859-1">
<Record fieldDelimiter="|" name="dynamic" previewAttachmentCharset="ISO-8859-1" recordDelimiter="\r\n" type="delimited">
<Field name="Name" type="string"/>
</Record>
</Metadata>
<Property fileURL="workspace.prm" id="GraphParameter0"/>
<Note alignment="1" backgroundColorB="225" backgroundColorG="255" backgroundColorR="255" folded="false" height="171" id="Note0" textColorB="0" textColorG="0" textColorR="0" textFontSize="8" title="This phase is used to take in a tuple input file, /data-in/EAV.txt and build a metadata .fmt file dynamically using the distinct list of attributes found." titleColorB="0" titleColorG="0" titleColorR="0" titleFontSize="10" width="1051" x="1" y="44"/>
<Note alignment="1" backgroundColorB="225" backgroundColorG="255" backgroundColorR="255" folded="false" height="151" id="Note1" textColorB="0" textColorG="0" textColorR="0" textFontSize="8" title="This phase is used to take in a tuple input file, /data-in/EAV.txt and transpose it into the new dynamically generated metadata at the output edge" titleColorB="0" titleColorG="0" titleColorR="0" titleFontSize="10" width="1016" x="11" y="277"/>
<Dictionary/>
</Global>
<Phase number="0">
<Node aggregateKey="NAME" enabled="enabled" guiHeight="0" guiName="Aggregate" guiWidth="0" guiX="470" guiY="107" id="AGGREGATE0" mapping="$Name:=$NAME;" type="AGGREGATE"/>
<Node enabled="enabled" fileURL="${DATAIN_DIR}/EAV.txt" guiHeight="25" guiName="Input EAV File" guiWidth="50" guiX="20" guiY="107" id="DATA_READER1" skipFirstLine="true" type="DATA_READER"/>
<Node enabled="enabled" guiHeight="0" guiName="ExtSort" guiWidth="0" guiX="245" guiY="107" id="EXT_SORT0" sortKey="NAME(a)" type="EXT_SORT"/>
<Node enabled="enabled" fileURL="${META_DIR}/dynamicMetadata.fmt" footer="</Record>" guiHeight="25" guiName="STRUCTURE_WRITER0" guiWidth="50" guiX="695" guiY="107" header="<?xml version="1.0" encoding="UTF-8"?> <Record fieldDelimiter="|" name="generateMetadata" previewAttachmentCharset="ISO-8859-1" recordDelimiter="\\r\\n" skipSourceRows="0" type="delimited"> <Field name="ID" type="string"/> " id="STRUCTURE_WRITER0" mask="<Field name=\"$Name\" type=\"string\"/>" type="STRUCTURE_WRITER"/>
<Edge fromNode="AGGREGATE0:0" guiBendpoints="" guiRouter="Manhattan" id="Edge6" inPort="Port 0 (Body port)" metadata="Metadata6" outPort="Port 0 (out)" toNode="STRUCTURE_WRITER0:0"/>
<Edge fromNode="DATA_READER1:0" guiBendpoints="" guiRouter="Manhattan" id="Edge7" inPort="Port 0 (in)" metadata="Metadata3" outPort="Port 0 (output)" toNode="EXT_SORT0:0"/>
<Edge fromNode="EXT_SORT0:0" guiBendpoints="" guiRouter="Manhattan" id="Edge8" inPort="Port 0 (in)" metadata="Metadata4" outPort="Port 0 (out)" toNode="AGGREGATE0:0"/>
</Phase>
<Phase number="1">
<Node enabled="enabled" fileURL="${DATAIN_DIR}/EAV.txt" guiHeight="0" guiName="UniversalDataReader" guiWidth="0" guiX="37" guiY="312" id="DATA_READER0" skipFirstLine="true" type="DATA_READER"/>
<Node enabled="enabled" fileURL="${DATAOUT_DIR}\dynamicDataDenormalizer.out" guiHeight="0" guiName="UniversalDataWriter" guiWidth="0" guiX="487" guiY="312" id="DATA_WRITER0" outputFieldNames="true" type="DATA_WRITER"/>
<Node enabled="enabled" guiHeight="0" guiName="Transpose" guiWidth="0" guiX="262" guiY="312" id="DENORMALIZER0" key="ID" order="Ignore" type="DENORMALIZER">
<attr name="denormalize"><![CDATA[import java.util.Properties;
import org.jetel.component.denormalize.DataRecordDenormalize;
import org.jetel.data.DataRecord;
import org.jetel.exception.ComponentNotReadyException;
import org.jetel.exception.TransformException;
import org.jetel.metadata.DataRecordMetadata;
import java.util.HashMap;
import java.util.Map;
import java.util.Iterator;
import java.util.ArrayList;
public class MyDynamicDenormalize extends DataRecordDenormalize {
private HashMap outputFieldMap;
private String id;
@Override
public boolean init(Properties parameters,
DataRecordMetadata sourceMetadata, DataRecordMetadata targetMetadata)
throws ComponentNotReadyException {
outputFieldMap = new HashMap();
for (int i = 0; i < targetMetadata.getNumFields(); i++) {
System.err.println("TargetMetadata Name: " + targetMetadata.getField(i).getName());
if (!targetMetadata.getField(i).getName().equals("ID")) {
outputFieldMap.put(targetMetadata.getField(i).getName(), new ArrayList<String>());
}
}
return super.init(parameters, sourceMetadata, targetMetadata);
}
//Loop through each incoming tuple and add each value to the map
@Override
public int append(DataRecord inputRecord) throws TransformException {
id = inputRecord.getField(0).toString(); // position 0 = ID
String tupleName = inputRecord.getField(1).toString(); // position 1 = NAME
String tupleValue = inputRecord.getField(2).toString(); // position 2 = VALUE
if (outputFieldMap.containsKey(tupleName)) {
ArrayList<String> valueList = (ArrayList<String>)outputFieldMap.get(tupleName);
if (!valueList.contains(tupleValue)) {
valueList.add(tupleValue);
}
outputFieldMap.put(tupleName, valueList);
}
return 0;
}
@Override
public int transform(DataRecord outputRecord) throws TransformException {
for (int i = 0; i < outputRecord.getNumFields(); i++) {
String outputFieldName = outputRecord.getField(i).getMetadata().getName();
if (!outputRecord.getField(i).getMetadata().getName().equals("ID")) {
StringBuilder outputValue = new StringBuilder();
if (outputFieldMap.containsKey(outputFieldName)) {
ArrayList<String> valueList = (ArrayList<String>)outputFieldMap.get(outputFieldName);
for (String value : valueList) {
outputValue = outputValue.append(value).append("~");
}
}
if (outputValue.length() > 0) {
outputRecord.getField(i).setValue(outputValue.subSequence(0, outputValue.length() - 1));
}
}
else{
outputRecord.getField(i).setValue(id);
}
}
return 0;
}
@Override
public void clean() {
super.clean();
Iterator it = outputFieldMap.entrySet().iterator();
while (it.hasNext()) {
Map.Entry pairs = (Map.Entry)it.next();
outputFieldMap.put(pairs.getKey(), new ArrayList<String>());
}
}
}
]]></attr>
</Node>
<Edge debugMode="true" fromNode="DATA_READER0:0" guiBendpoints="" guiRouter="Manhattan" id="Edge0" inPort="Port 0 (in)" metadata="Metadata0" outPort="Port 0 (output)" toNode="DENORMALIZER0:0"/>
<Edge fromNode="DENORMALIZER0:0" guiBendpoints="" guiRouter="Manhattan" id="Edge2" inPort="Port 0 (in)" metadata="Metadata8" outPort="Port 0 (out)" toNode="DATA_WRITER0:0"/>
</Phase>
</Graph>
CloverETL 3.1 implements two new components - Pivot and MetaPivot which do the transposition as from tuples to name-value pairs and back. These two components will, however, be available in commercial version only.
Hello,
the idea is good, but it doesn’t work in general. The graph needs to be split to 2 graphs, as metadata can’t be changed in the runtime. You can find a proper solution in the attachment.