1. Se adreseaz unei comuniti largi de dezvoltatori 2. Este o interfa standard (standardele JSR 73, JSR 247 - JCP) 3. Crearea unui standard acceptabil de catre potentiali furnizori (vendors) i consumatori. Exp: JSR 73, 247 Oracle 4. Extensibilitate 5. Implementarea unor algoritmi de baz dar cu larg aplicabilitate i posibiliti de individualizare start small and grow in functionality 6. Simplific problematica pentru nespecialiti 7. Prezint limitrile de implementare pentru potenialii furnizori 8. Sustine cerintele aplicaiilor industriale, reale 9. Face apel la furnizori pentru modificari de standard si extindere 10. Tine cont de alte standarde de data mining JDM Api Pachetul de baz: javax.datamining Elementul central: DME (Data Mining Engine) Conectarea la DME public interface Connection; // asigur comunicarea cu DME, metode pentru crearea obiectelor Factories i executarea proceselor de mining public interface ConnectionFactory extends Factory; // utilizat pentru a crea conexiuni DME prin una din metodele getConnection public interface ConnectionSpec; // specific informaii de locaie i autentificare Interfaa ConnectionFactory Are o implementare specific ce ine de modul n care se creeaz o conexiune Obiectele ConnectionFactory implementeaz interfeele: javax.naming.Referenceable, javax.io.Serializable prin JNDI Implemetarea Oracle este realizat prin clasa OraConnectionFactory public class OraConnectionFactory extends java.lang.Object implements javax.datamining.resource.ConnectionFactory, java.io.Serializable, javax.naming.Referenceable javax.datamining.resource.ConnectionFactory connFactory = oracle.dmt.jdm.resource.OraConnectionFactory(); Metode ConnectionFactory: Connection getConnection() throws JDMException ; // creaz o conexiune la DME Connection getConnection(ConnectionSpec spec) throws JDMException ; // creaz o conexiune la DME cu informaiile de localizare si autentificare in obiectul spec ConnectionSpec getConnectionSpec(); // creaza un obiect ConnectionSpec gol, care urmeaza a fi parametrizat Metode ConnectionSpec java.lang.String getName() void setName(java.lang.String userName) java.lang.String getURI() void setURI(java.lang.String uri) void setPassword(java.lang.String password) void setLocale(java.util.Locale locale) java.util.Locale getLocale() ConnectionFactory cFact=new OraConnectionFactory(); ConnectionSpec specificatii=cFact.getConnectionSpec(); String uri=conectare.getServer()+":1521:numeInstanta"; String utz="utilizator"; String parola="parola"; specificatii.setURI("jdbc:oracle:thin:@"+uri); specificatii.setName(utz); specificatii.setPassword(parola); Connection c=cFact.getConnection(specificatii); Definirea obiectelor public interface MiningObject; // Interfata generica pentru definirea oricarui tip de obiect data mining. Pentru obiecte de un anumit tip exista interfete derivate. Aceste obiecte sunt accesibile via Connection Metode: java.lang.String getDescription(); void setDescription(java.lang.String description) throws JDMException; java.lang.String getName(); java.lang.String getObjectIdentifier(); NamedObject getObjectType(); Tipuri de obiecte public class NamedObject extends Enum; // lista cu tipurile de obiecte gestionate de DME Exemplu: public static final NamedObject model; Metode: public static NamedObject valueOf(java.lang.String name) throws JDMException; // Intoarce o referinta NamedObject pentru numele specificat public static NamedObject[] values(); //Intoarce o lista a tuturor tipurilor definite public java.lang.String name(); Functii si algoritmi public class MiningFunction extends Enum; public class MiningAlgorithmextends Enum; Functii JDM: association,attributeImportance,regression,clustering,classification Algoritmi JDM: feedForwardNeuralNet,kMeans,naiveBayes,decisionTree,svmRegressi on,svmClassification Functii implementate in ODM: featureExtraction,association,regression,clustering,attributeImportance, classification Algoritmi implementati in ODM: generalizedLinearModel,minimumDescriptionLength,svmClassification, decisionTree,naiveBayes,aprioriAssociationRules,oCluster,nonNega tiveMatrixFactorization,kMeans,feedForwardNeuralNet,svmRegressi on,adaptiveBayesNetwork Crearea obiectelor Factory prin conexiunea la DME public interface Factory; // interfata radacina pentru obiectele Factory Crearea obiectelor Factory prin obiectele Connection: Factory getFactory(java.lang.String objectName) throws JDMException ; // creaza un obiect Factory din clasa specificata Exemple: pdsFact = (PhysicalDataSetFactory) c.getFactory("javax.datamining.data.PhysicalDataSet"); paFact=(PhysicalAttributeFactory) c.getFactory("javax.datamining.data.PhysicalAttribute"); clustFact = (ClusteringSettingsFactory) c.getFactory("javax.datamining.clustering.ClusteringSettings"); ... Taskuri public interface Task extends MiningObject; // Obiecte speciale care executa procesele de mining. Sunt lansate prin metode execute() ale obiectului Connection Metode: getExecutionHandle ExecutionHandle getExecutionHandle() throws JDMException; // da informatii despre starea executiei - in desfasurare sau terminata Informatiile sunt furnizate prin obiectul ExecutionHandle: java.util.Date getStartTime() throws JDMException; java.lang.Integer getDurationInSeconds() throws JDMException; Crearea taskurilor public interface BuildTaskFactory extends Factory; // Creaza taskuri pentru modelele de mining Metodele de tip create() sunt cele care creaza efectiv taskul. Exemplu: BuildTask create(java.lang.String buildData, java.lang.String buildSettingsName, java.lang.String modelName) throws JDMException; // numele modelului fizic de date, numele setarilor modelului, numele modelului Mecanismul ramane valabil si pentru alte tipuri de taskuri, cu diferente la metodele create (vezi documentatia API). Managementul obiectelor DM prin obiecte Connection void saveObject(java.lang.String name, MiningObject object, boolean replace) throws JDMException; // salveaza un obiect DM cu numele specificat. replace arata daca un obiect existent cu acelasi nume va fi inlocuit void setDescription(java.lang.String objectName, NamedObject objectType, java.lang.String description) throws JDMException; // specifica o descriere pentru obiectul objectName de tipul objectType java.lang.String getDescription(java.lang.String objectName, NamedObject objectType) throws JDMException void removeObject(java.lang.String name, NamedObject objectType) throws JDMException ; void renameObject(java.lang.String oldName, java.lang.String newName, NamedObject objectType) throws JDMException MiningObject retrieveObject(java.lang.String objectIdentifier) throws JDMException boolean doesObjectExist(java.lang.String objectName, NamedObject objectType) throws JDMException java.util.Collection retrieveObjects(java.util.Date createdAfter, java.util.Date createdBefore, NamedObject objectType) throws JDMException Metode de executare a taskurilor ExecutionStatus execute(Task task, java.lang.Long timeout) throws JDMException; // executarea sincrona a unui task si intorcerea unui obiect ExecutionStatus dupa executare sau la expirarea timpului ExecutionHandle execute(java.lang.String taskName, ExecutionHandle handleDependency) throws JDMException; // executare asincrona a unui task cu urmarirea executiei taskului Metode de verificare a capabilitatilor MiningFunction[] getSupportedFunctions() throws JDMException ; // intoarce lista functiilor DM suportate de conexiunea DME MiningAlgorithm[] getSupportedAlgorithms(MiningFunction function) throws JDMException ; // idem algoritmi DM NamedObject[] getNamedObjects(PersistenceOption persistenceOption) throws JDMException ; // lista obiectelor care suporta optiunea persistenceOption mentionata boolean supportsCapability(NamedObject object, PersistenceOption persistence) throws JDMException ; // intoarce true daca obiectul specificat suporta optiunea persistence boolean supportsCapability(MiningFunction function, MiningAlgorithm algorithm, MiningTask taskType) throws JDMException; // intoarce true daca este suporatata combinatia functie-algoritm-task specificata Executarea taskurilor DM Se realizeaza prin cele doua metode execute, in mod: Sincron. Executie pana la final sau pana la terminarea timpului. Intoarce rezultat numai dupa finalizare. Este utilizata pentru taskuri simple cu posibilitati de finalizare in timp real public boolean run() throws JDMException { BuildTask btk=btkF.create("sablon_build_data", "sablon_build_settings", "sablon_model"); Long timeOut=null; ExecutionStatus status=dmeCon.execute(btk, timeOut); if(ExecutionState.success.equals(status.getState())) return true; else return false; } Asincron. Monitorizarea executiei se realizeaza printr-un obiect ExecutionHandle prin metoda waitForCompletion private boolean executeTask(Task taskObj, String taskName) throws JDMException { boolean isTaskSuccess = false; c.saveObject(taskName, taskObj, true); ExecutionHandle execHandle = c.execute(taskName); ExecutionStatus status = execHandle.waitForCompletion(Integer.MAX_VALUE); isTaskSuccess = status.getState().equals(ExecutionState.success); return isTaskSuccess; } Descrierea datelor pentru procesele de mining public interface PhysicalDataSet extends MiningObject ; // Contine descrierea fizica a datelor: nume atribute, tip de data void addAttribute(PhysicalAttribute attribute) throws JDMException; public interface PhysicalDataSetFactoryextends Factory; PhysicalDataSet create(java.lang.String uri, boolean importMetaData) throws JDMException; // url - numele tabelei, importMetaData - daca se importa sau nu metadatele public interface PhysicalAttribute; // specifica detaliile pentru un atribut public interface PhysicalAttributeFactory extends Factory; PhysicalAttribute create(java.lang.String attrName, AttributeDataType dataType, PhysicalAttributeRole role) throws JDMException; Exemplu: // Construirea modelului fizic al datelor PhysicalDataSet pds=pdsFact.create("SALARIATI", false); // Specificarea si adaugarea cheii primare PhysicalAttribute pa=paFact.create("COD_SALARIAT", AttributeDataType.integerType,PhysicalAttributeRole.caseId); pds.addAttribute(pa); // Salvarea modelului fizic c.saveObject("pdsSALARIATI", pds, true); Clusterizarea public interface ClusteringSettingsFactory extends Factory; // Creaza obiecte ClusteringSettings ClusteringSettings create() throws JDMException; public interface ClusteringSettings; // contine diferite metode pentru parametrizarea modelului de clusterizare void setAlgorithmSettings(AlgorithmSettings algorithmSettings); // asociaza un anumit algoritm procesului de clusterizare // Stabileste\intoarce numarul maxim de clusteri void setMaxNumberOfClusters(int maxClusters); int getMaxNumberOfClusters(); // Stabileste\intoarce numarul maxim de instante pe cluster void setMinClusterCaseCount(long minCaseCount); long getMinClusterCaseCount() Modelul KMeans Oracle public class OraKMeansSettings extends oracle.dmt.jdm.clustering.OraClusteringAlgorithmSettings implements javax.datamining.algorithm.kmeans.KMeansSettings public void setSplitCriterion(OraSplitCriterion splitCriterion) ; public OraSplitCriterion getSplitCriterion() throws javax.datamining.JDMException ; public void setNumberOfBins(int numberOfBins) ;// implicit 10 public int getNumberOfBins() ; Exemplu parametrizari Oracle KMeans: OraKMeansSettings kmAlgo = (OraKMeansSettings)kmeansFact.create(); kmAlgo.setDistanceFunction(ClusteringDistanceFunction.euclidean); kmAlgo.setMaxNumberOfIterations(10); kmAlgo.setMinErrorTolerance(0.01); kmAlgo.setSplitCriterion(OraSplitCriterion.clusterVariance); kmAlgo.setNumberOfBins(10); kmAlgo.setBlockGrowth(2); kmAlgo.setMinPercentageAtrrSupport(0.1); Rezultate clusterizare ClusteringModel model = (ClusteringModel)c.retrieveObject("mKM"+tabela, NamedObject.model); // Recuperarea modelului de mining Metode ClusteringModel: java.util.Collection getClusters() throws JDMException; int getNumberOfClusters(); int getNumberOfLevels(); Cluster getCluster(int identifier) throws JDMException; java.util.Collection getLeafClusters() throws JDMException; java.util.Collection getRules() throws JDMException; public interface Cluster; // Modeleaza conceptul de cluster int getClusterId(); long getCaseCount(); Cluster getParent() throws JDMException; Cluster[] getAncestors() throws JDMException; Cluster[] getChildren() throws JDMException; int getLevel(); double getSupport(); java.lang.Double getCentroidCoordinate(java.lang.String numericalAttributeName) throws JDMException; AttributeStatisticsSet getStatistics() throws JDMException; boolean isLeaf(); boolean isRoot(); Rule getRule(); Reguli public interface Rule; long getAbsoluteSupport(); // numarul de cazuri care indeplinesc regula Predicate getAntecedent(); Predicate getConsequent(); int getRuleIdentifier(); public interface SimplePredicate extends Predicate; java.lang.String getAttributeName(); ComparisonOperator getComparisonOperator(); java.lang.Double getNumericalValue(); java.lang.Object[] getCategoryValues(); boolean isNumericalValue(); Exp: "age < 20" "gender == Male"