Você está na página 1de 12

DBSCAN Algorithm Java Implementation

Posted by Indian on 6:32 PM in Web Development | DBSCAN is a density-based clustering algorithm, and its basic principle is to a given two parameters, and minp, where can be interpreted as the radius, the algorithm will search within a radius of the sample, minp is a To find the radius of the sample number n of constraints, as long as n> = minp, find the sample point is the core of the sample points, the proposed algorithm are described in References 1, below is a java implementation of this algorithm: ... DBSCAN is a density-based clustering algorithm, and its basic principle is given two parameters, and minp, where can be interpreted as the radius, the algorithm will find the sample in this radius, minp is a search radius n number of samples to restrictions, as long as n> = minp, find the sample point is the core of the sample points, the proposed algorithm are described in References 1, below is a java implementation of this algorithm: First define a Point class, the representative sample points

package com.sunzhenxing; public class Point ( private int x; private int y; private boolean isKey; private boolean isClassed; public boolean isKey () ( return isKey; ) public void setKey (boolean isKey) ( this.isKey = isKey; this.isClassed = true;

) public boolean isClassed () ( return isClassed; ) public void setClassed (boolean isClassed) ( this.isClassed = isClassed; ) public int getX () ( return x; ) public void setX (int x) ( this.x = x; ) public int getY () ( return y; ) public void setY (int y) ( this.y = y; ) public Point () ( x = 0; y = 0; )

public Point (int x, int y) ( this.x = x; this.y = y; ) public Point (String str) ( String [] p = str.split (","); this.x = Integer.parseInt (p [0]); this.y = Integer.parseInt (p [1]); ) public String print () ( return "<" + this.x +","+ this.y +">"; ) ) And then define a utility class, for the algorithm implementation services: package com.sunzhenxing; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util .*; public class Utility ( / ** * Test the distance between the two points * @ Param p point

* @ Param q point * @ Return returns the distance between two points */ public static double getDistance (Point p, Point q) ( int dx = p.getX ()-q.getX (); int dy = p.getY ()-q.getY (); double distance = Math.sqrt (dx * dx + dy * dy); return distance; ) / ** * Check the given point is not the central point * @ Param lst The list storage point * @ Param p the point to be tested * @ Param ee radius * @ Param minp density threshold * @ Return a temporary storage point visited */ public static List isKeyPoint (List lst, Point p, int e, int minp) ( int count = 0; List tmpLst = new ArrayList (); for (Iterator it = lst.iterator (); it.hasNext ();){ Point q = it.next (); if (getDistance (p, q) <= e) ( + + Count; if (! tmpLst.contains (q)) (

tmpLst.add (q); ) ) ) if (count> = minp) ( p.setKey (true); return tmpLst; ) return null; ) public static void setListClassed (List lst) ( for (Iterator it = lst.iterator (); it.hasNext ();){ Point p = it.next (); if (! p.isClassed ()) ( p.setClassed (true); ) ) ) / ** Merge * @ Param a * @ Param b * @ Return a

*/ public static boolean mergeList (List a, List b) ( boolean merge = false; for (int index = 0; index if (a.contains (b.get (index))) ( merge = true; break; ) ) if (merge) ( for (int index = 0; index if (! a.contains (b.get (index))) ( a.add (b.get (index)); ) ) ) return merge; ) / ** * Back to the collection point in the text * @ Return back to the mid-point of the set text * @ Throws IOException */

public static List getPointsList () throws IOException ( List lst = new ArrayList (); String txtPath = "src \ \ com \ \ sunzhenxing \ \ points.txt"; BufferedReader br = new BufferedReader (new FileReader (txtPath)); String str = ""; while ((str = br.readLine ())!= null & & str !=""){ lst.add (new Point (str)); ) br.close (); return lst; ) ) Finally, in the main program to implement algorithm, as follows: package com.sunzhenxing; import java.io. *; import java.util .*; public class Dbscan ( private static List pointsList = new ArrayList ();// store the set of all points private static List > resultList = new ArrayList >();// storage DBSCAN algorithm to return the result set private static int e = 2; / / e radius private static int minp = 3; / / density threshold / ** * Extract all the points in the text and stored in the pointsList

* @ Throws IOException */ private static void display () ( int index = 1; for (Iterator > it = resultList.iterator (); it.hasNext ();){ List lst = it.next (); if (lst.isEmpty ()) ( continue; ) System.out.println ("----- s "+ index +" a cluster -----"); for (Iterator it1 = lst.iterator (); it1.hasNext ();){ Point p = it1.next (); System.out.println (p.print ()); ) index + +; ) ) / / Find all the cluster can be directly private static void applyDbscan () ( try ( pointsList = Utility.getPointsList (); for (Iterator it = pointsList.iterator (); it.hasNext ();){ Point p = it.next (); if (! p.isClassed ()) (

List tmpLst = new ArrayList (); if ((tmpLst = Utility.isKeyPoint (pointsList, p, e, minp))! = null) ( / / End point for all clustering to mark Utility.setListClassed (tmpLst); resultList.add (tmpLst); ) ) ) ) Catch (IOException e) ( / / TODO Auto-generated catch block e.printStackTrace (); ) ) / / Direct access to the clustering of all the merger, that is up to the point and find the indirect merger private static List > getResult () ( applyDbscan ();// find all the direct clustering int length = resultList.size (); for (int i = 0; i for (int j = i +1; j if (Utility.mergeList (resultList.get (i), resultList.get (j))) ( resultList.get (j). clear (); ) )

) return resultList; ) / ** * Program main function * @ Param args */ public static void main (String [] args) ( getResult (); display (); / / System.out.println (Utility.getDistance (new Point (0,0), new Point (0,2))); ) ) Below is a small test, that is used src \ \ com \ \ sunzhenxing \ \ points.txt contents of the file test, points.txt the file contents are: 0,0 0,1 0,2 0,3 0,4 0,5 12,1 12.2

12.3 12,4 12,5 12.6 0,6 0,7 12,7 0,8 0,9 1,1 The final result of the algorithm is: ----- ----- 1st cluster <0,0> <0,1> <0,2> <1,1> <0,3> <0,4> <0,5> <0,6> <0,7> <0,8> <0,9>

----- ----- 2nd cluster <12,1> <12.2> <12.3> <12,4> <12,5> <12.6> <12,7> Coordinates we can draw what conclusions the experiment to understand.

Você também pode gostar