How to resolve the algorithm Top rank per group step by step in the Scala programming language

Problem Statement
Step by Step Solution
Sourcecode

Problem Statement

Find the top N salaries in each department, where N is provided as a parameter. Use this data as a formatted internal data structure (adapt it to your language-native idioms, rather than parse at runtime), or identify your external data source:

Let's start with the solution:

Step by Step solution about How to resolve the algorithm Top rank per group step by step in the Scala programming language

Source code in the scala programming language

import scala.io.Source
import scala.language.implicitConversions
import scala.language.reflectiveCalls
import scala.collection.immutable.TreeMap

object TopRank extends App {
  val topN = 3

  val rawData = """Employee Name;Employee ID;Salary;Department
               |Tyler Bennett;E10297;32000;D101
               |John Rappl;E21437;47000;D050
               |George Woltman;E00127;53500;D101
               |Adam Smith;E63535;18000;D202
               |Claire Buckman;E39876;27800;D202
               |David McClellan;E04242;41500;D101
               |Rich Holcomb;E01234;49500;D202
               |Nathan Adams;E41298;21900;D050
               |Richard Potter;E43128;15900;D101
               |David Motsinger;E27002;19250;D202
               |Tim Sampair;E03033;27000;D101
               |Kim Arlich;E10001;57000;D190
               |Timothy Grove;E16398;29900;D190""".stripMargin

  class Employee(name: String, id: String,
                 val salary: Int,
                 val department: String) {
    override def toString = s"$id\t$salary\t$name"
  }

  // A TreeMap has sorted keys
  val data: TreeMap[String, Seq[TopRank.Employee]] = // TreeMap is a sorted map
    TreeMap((Source.fromString(rawData) getLines ()).toSeq // Runtime parsing
      .drop(1) // Drop header
      .map(_.split(";")) //read fields into list of employees
      .map(emp => new Employee(emp(0), emp(1), emp(2).toInt, emp(3)))
      .groupBy(_.department).toSeq: _*)

  implicit def iterableWithAvg[T: Numeric](data: Iterable[T]) = new {
    def average[T](ts: Iterable[T])(implicit num: Numeric[T]) = {
      num.toDouble(ts.sum) / ts.size
    }
    def avg = average(data)
  }

  val a = data.flatMap { case (_, emps) => emps.map(_.salary) }.avg

  println(s"Reporting top $topN salaries in each department.\n")

  println(s"Total of ${data.foldLeft(0)(_ + _._2.size)} employees in ${data.size} departments")

  println(f"Average salary: $a%8.2f\n")

  data.foreach {
    case (dep, emps) => println(f"Department: $dep  pop: ${emps.size} avg: ${emps.map(_.salary).avg}%8.2f\n"
      + emps.sortBy(-_.salary).take(topN)
      .map(_.toString).mkString("\t", "\n\t", ""))
  }
}


import scala.slick.driver.H2Driver.simple._
import scala.slick.lifted.ProvenShape

// A Employees table with 4 columns: Employee ID, Employee Name, Department, Salary 
class Emp(tag: Tag) extends Table[(String, String, String, Double)](tag, "EMP") {
  def id: Column[String] = column[String]("EMP_ID", O.PrimaryKey) // This is the primary key column
  def name: Column[String] = column[String]("EMP_NAME", O.NotNull)
  def deptId: Column[String] = column[String]("DEPT_ID", O.NotNull)
  def salary: Column[Double] = column[Double]("SALARY", O.NotNull)

  // Every table needs a * projection with the same type as the table's type parameter
  def * : ProvenShape[(String, String, String, Double)] = (id, name, deptId, salary)
}

// The main application
object TopNrankSLICK extends App {

  val topN = 3

  // The query interface for the Emp table
  val employees = TableQuery[Emp]

  // Create a connection (called a "session") to an in-memory H2 database
  Database.forURL("jdbc:h2:mem:hello", driver = "org.h2.Driver").withSession {
    implicit session =>

      // Create the schema
      employees.ddl.create

      // Fill the database
      val employeesInsertResult: Option[Int] = employees ++= Seq(
        ("E10297", "Tyler Bennett", "D101", 32000),
        ("E21437", "John Rappl", "D050", 47000),
        ("E00127", "George Woltman", "D101", 53500),
        ("E63535", "Adam Smith", "D202", 18000),
        ("E39876", "Claire Buckman", "D202", 27800),
        ("E04242", "David McClellan", "D101", 41500),
        ("E01234", "Rich Holcomb", "D202", 49500),
        ("E41298", "Nathan Adams", "D050", 21900),
        ("E43128", "Richard Potter", "D101", 15900),
        ("E27002", "David Motsinger", "D202", 19250),
        ("E03033", "Tim Sampair", "D101", 27000),
        ("E10001", "Kim Arlich", "D190", 57000),
        ("E16398", "Timothy Grove", "D190", 29900),
        ("E16399", "Timothy Grave", "D190", 29900),
        ("E16400", "Timothy Grive", "D190", 29900))

      /* Manual SQL / String Interpolation */
      // Required import for the sql interpolator
      import scala.slick.jdbc.StaticQuery.interpolation

      // Construct a SQL statement manually with an interpolated value
      val plainQuery = // First the bun - formatting SELECT clause
        sql"""select case LINE
         when 10 then
          'Tot.' || LPAD(POPULATION, 2) || ' Employees in ' || TIE_COUNT ||
          ' deps.Avg salary:' || TO_CHAR(SALARY, '99990.99')
         when 30 then
          '-'
         when 50 then
          'Department: ' || DEPT_ID || ', pop: ' || POPULATION ||
          '. Avg Salary: ' || TO_CHAR(SALARY, '99990.99')
         when 70 then
          LPAD('Employee ID', 14) || LPAD('Employee name', 20) ||
          LPAD('Salary', 9) || 'Rank'
         when 90 then
          LPAD('+', 14, '-') || LPAD('+', 20, '-') || LPAD('+', 9, '-') ||
          LPAD('+', 4, '-')
         else
          LPAD(' ', 8) || LPAD(EMP_ID, 6) || LPAD(EMP_NAME, 20) ||
          TO_CHAR(SALARY, '99990.99') || LPAD(case when TIE_COUNT = 1 then ' ' else 'T' end || RANK, 4)
       end "Top rank per group"
  from (select 10 LINE
              ,null EMP_ID
              ,null EMP_NAME
              ,' ' DEPT_ID
              ,avg(SALARY) SALARY
              ,0 RANK
              ,count(distinct DEPT_ID) TIE_COUNT
              ,count(*) POPULATION
          from EMP
        union all
        select 30      LINE
              ,null    EMP_ID
              ,null    EMP_NAME
              ,DEPT_ID
              ,0       SALARY
              ,0       RANK
              ,0       TIE_COUNT
              ,0       POPULATION
          from EMP
         group by DEPT_ID
        union all
        select 50 LINE
              ,null EMP_ID
              ,null EMP_NAME
              ,DEPT_ID
              ,avg(SALARY) SALARY
              ,0 RANK
              ,0 TIE_COUNT
              ,count(*) POPULATION
          from EMP
         group by DEPT_ID
        union all
        select 70      LINE
              ,null    EMP_ID
              ,null    EMP_NAME
              ,DEPT_ID
              ,0       SALARY
              ,0       RANK
              ,0       TIE_COUNT
              ,0       POPULATION
          from EMP
         group by DEPT_ID
        union all
        select 90      LINE
              ,null    EMP_ID
              ,null    EMP_NAME
              ,DEPT_ID
              ,0       SALARY
              ,0       RANK
              ,0       TIE_COUNT
              ,0       POPULATION
          from EMP
         group by DEPT_ID
        union all
        select 110 LINE
              ,EMP_ID
              ,EMP_NAME
              ,DEPT_ID
              ,SALARY
              ,(select count(distinct EMP4.SALARY)
                  from EMP EMP4
                 where EMP4.DEPT_ID = EMP3.DEPT_ID
                   and EMP4.SALARY >= EMP3.SALARY) RANK
              ,(select count(*)
                  from EMP EMP2
                 where EMP2.DEPT_ID = EMP3.DEPT_ID
                   and EMP2.SALARY = EMP3.SALARY) TIE_COUNT
              ,0 POPULATION
          from EMP EMP3
         where $topN >= -- Here is the meat, Correlated subquery
               (select count(distinct EMP4.SALARY)
                  from EMP EMP4
                 where EMP4.DEPT_ID = EMP3.DEPT_ID
                   and EMP4.SALARY >= EMP3.SALARY))
 order by DEPT_ID ,LINE ,SALARY desc, EMP_ID""".as[String]

      // Execute the query
      plainQuery.foreach(println(_))
  } // session
} // TopNrankSLICK

You may also check:How to resolve the algorithm Factorions step by step in the Swift programming language
You may also check:How to resolve the algorithm Terminal control/Ringing the terminal bell step by step in the Scala programming language
You may also check:How to resolve the algorithm Combinations step by step in the MATLAB programming language
You may also check:How to resolve the algorithm 100 doors step by step in the Aikido programming language
You may also check:How to resolve the algorithm Concurrent computing step by step in the FutureBasic programming language

How to resolve the algorithm Top rank per group step by step in the Scala programming language

Table of Contents

Problem Statement

Step by Step solution about How to resolve the algorithm Top rank per group step by step in the Scala programming language

Source code in the scala programming language