How to resolve the algorithm Top rank per group step by step in the Scala programming language
Published on 12 May 2024 09:40 PM
How to resolve the algorithm Top rank per group step by step in the Scala programming language
Table of Contents
Problem Statement
Find the top N salaries in each department, where N is provided as a parameter. Use this data as a formatted internal data structure (adapt it to your language-native idioms, rather than parse at runtime), or identify your external data source:
Let's start with the solution:
Step by Step solution about How to resolve the algorithm Top rank per group step by step in the Scala programming language
Source code in the scala programming language
import scala.io.Source
import scala.language.implicitConversions
import scala.language.reflectiveCalls
import scala.collection.immutable.TreeMap
object TopRank extends App {
val topN = 3
val rawData = """Employee Name;Employee ID;Salary;Department
|Tyler Bennett;E10297;32000;D101
|John Rappl;E21437;47000;D050
|George Woltman;E00127;53500;D101
|Adam Smith;E63535;18000;D202
|Claire Buckman;E39876;27800;D202
|David McClellan;E04242;41500;D101
|Rich Holcomb;E01234;49500;D202
|Nathan Adams;E41298;21900;D050
|Richard Potter;E43128;15900;D101
|David Motsinger;E27002;19250;D202
|Tim Sampair;E03033;27000;D101
|Kim Arlich;E10001;57000;D190
|Timothy Grove;E16398;29900;D190""".stripMargin
class Employee(name: String, id: String,
val salary: Int,
val department: String) {
override def toString = s"$id\t$salary\t$name"
}
// A TreeMap has sorted keys
val data: TreeMap[String, Seq[TopRank.Employee]] = // TreeMap is a sorted map
TreeMap((Source.fromString(rawData) getLines ()).toSeq // Runtime parsing
.drop(1) // Drop header
.map(_.split(";")) //read fields into list of employees
.map(emp => new Employee(emp(0), emp(1), emp(2).toInt, emp(3)))
.groupBy(_.department).toSeq: _*)
implicit def iterableWithAvg[T: Numeric](data: Iterable[T]) = new {
def average[T](ts: Iterable[T])(implicit num: Numeric[T]) = {
num.toDouble(ts.sum) / ts.size
}
def avg = average(data)
}
val a = data.flatMap { case (_, emps) => emps.map(_.salary) }.avg
println(s"Reporting top $topN salaries in each department.\n")
println(s"Total of ${data.foldLeft(0)(_ + _._2.size)} employees in ${data.size} departments")
println(f"Average salary: $a%8.2f\n")
data.foreach {
case (dep, emps) => println(f"Department: $dep pop: ${emps.size} avg: ${emps.map(_.salary).avg}%8.2f\n"
+ emps.sortBy(-_.salary).take(topN)
.map(_.toString).mkString("\t", "\n\t", ""))
}
}
import scala.slick.driver.H2Driver.simple._
import scala.slick.lifted.ProvenShape
// A Employees table with 4 columns: Employee ID, Employee Name, Department, Salary
class Emp(tag: Tag) extends Table[(String, String, String, Double)](tag, "EMP") {
def id: Column[String] = column[String]("EMP_ID", O.PrimaryKey) // This is the primary key column
def name: Column[String] = column[String]("EMP_NAME", O.NotNull)
def deptId: Column[String] = column[String]("DEPT_ID", O.NotNull)
def salary: Column[Double] = column[Double]("SALARY", O.NotNull)
// Every table needs a * projection with the same type as the table's type parameter
def * : ProvenShape[(String, String, String, Double)] = (id, name, deptId, salary)
}
// The main application
object TopNrankSLICK extends App {
val topN = 3
// The query interface for the Emp table
val employees = TableQuery[Emp]
// Create a connection (called a "session") to an in-memory H2 database
Database.forURL("jdbc:h2:mem:hello", driver = "org.h2.Driver").withSession {
implicit session =>
// Create the schema
employees.ddl.create
// Fill the database
val employeesInsertResult: Option[Int] = employees ++= Seq(
("E10297", "Tyler Bennett", "D101", 32000),
("E21437", "John Rappl", "D050", 47000),
("E00127", "George Woltman", "D101", 53500),
("E63535", "Adam Smith", "D202", 18000),
("E39876", "Claire Buckman", "D202", 27800),
("E04242", "David McClellan", "D101", 41500),
("E01234", "Rich Holcomb", "D202", 49500),
("E41298", "Nathan Adams", "D050", 21900),
("E43128", "Richard Potter", "D101", 15900),
("E27002", "David Motsinger", "D202", 19250),
("E03033", "Tim Sampair", "D101", 27000),
("E10001", "Kim Arlich", "D190", 57000),
("E16398", "Timothy Grove", "D190", 29900),
("E16399", "Timothy Grave", "D190", 29900),
("E16400", "Timothy Grive", "D190", 29900))
/* Manual SQL / String Interpolation */
// Required import for the sql interpolator
import scala.slick.jdbc.StaticQuery.interpolation
// Construct a SQL statement manually with an interpolated value
val plainQuery = // First the bun - formatting SELECT clause
sql"""select case LINE
when 10 then
'Tot.' || LPAD(POPULATION, 2) || ' Employees in ' || TIE_COUNT ||
' deps.Avg salary:' || TO_CHAR(SALARY, '99990.99')
when 30 then
'-'
when 50 then
'Department: ' || DEPT_ID || ', pop: ' || POPULATION ||
'. Avg Salary: ' || TO_CHAR(SALARY, '99990.99')
when 70 then
LPAD('Employee ID', 14) || LPAD('Employee name', 20) ||
LPAD('Salary', 9) || 'Rank'
when 90 then
LPAD('+', 14, '-') || LPAD('+', 20, '-') || LPAD('+', 9, '-') ||
LPAD('+', 4, '-')
else
LPAD(' ', 8) || LPAD(EMP_ID, 6) || LPAD(EMP_NAME, 20) ||
TO_CHAR(SALARY, '99990.99') || LPAD(case when TIE_COUNT = 1 then ' ' else 'T' end || RANK, 4)
end "Top rank per group"
from (select 10 LINE
,null EMP_ID
,null EMP_NAME
,' ' DEPT_ID
,avg(SALARY) SALARY
,0 RANK
,count(distinct DEPT_ID) TIE_COUNT
,count(*) POPULATION
from EMP
union all
select 30 LINE
,null EMP_ID
,null EMP_NAME
,DEPT_ID
,0 SALARY
,0 RANK
,0 TIE_COUNT
,0 POPULATION
from EMP
group by DEPT_ID
union all
select 50 LINE
,null EMP_ID
,null EMP_NAME
,DEPT_ID
,avg(SALARY) SALARY
,0 RANK
,0 TIE_COUNT
,count(*) POPULATION
from EMP
group by DEPT_ID
union all
select 70 LINE
,null EMP_ID
,null EMP_NAME
,DEPT_ID
,0 SALARY
,0 RANK
,0 TIE_COUNT
,0 POPULATION
from EMP
group by DEPT_ID
union all
select 90 LINE
,null EMP_ID
,null EMP_NAME
,DEPT_ID
,0 SALARY
,0 RANK
,0 TIE_COUNT
,0 POPULATION
from EMP
group by DEPT_ID
union all
select 110 LINE
,EMP_ID
,EMP_NAME
,DEPT_ID
,SALARY
,(select count(distinct EMP4.SALARY)
from EMP EMP4
where EMP4.DEPT_ID = EMP3.DEPT_ID
and EMP4.SALARY >= EMP3.SALARY) RANK
,(select count(*)
from EMP EMP2
where EMP2.DEPT_ID = EMP3.DEPT_ID
and EMP2.SALARY = EMP3.SALARY) TIE_COUNT
,0 POPULATION
from EMP EMP3
where $topN >= -- Here is the meat, Correlated subquery
(select count(distinct EMP4.SALARY)
from EMP EMP4
where EMP4.DEPT_ID = EMP3.DEPT_ID
and EMP4.SALARY >= EMP3.SALARY))
order by DEPT_ID ,LINE ,SALARY desc, EMP_ID""".as[String]
// Execute the query
plainQuery.foreach(println(_))
} // session
} // TopNrankSLICK
You may also check:How to resolve the algorithm Factorions step by step in the Swift programming language
You may also check:How to resolve the algorithm Terminal control/Ringing the terminal bell step by step in the Scala programming language
You may also check:How to resolve the algorithm Combinations step by step in the MATLAB programming language
You may also check:How to resolve the algorithm 100 doors step by step in the Aikido programming language
You may also check:How to resolve the algorithm Concurrent computing step by step in the FutureBasic programming language