From efc37c2d17cf6d1cb5e2d3b855ce6d18375554e5 Mon Sep 17 00:00:00 2001 From: lws49 Date: Fri, 26 Jun 2026 11:53:56 +0800 Subject: [PATCH] feat(gradebook): import header tolerance, column ordering, reassigned-identifier detection, and batched writes --- .../external_assessment_import_service.rb | 339 +++++++++-- .../preview.json.jbuilder | 27 +- ...rnal_assessment_imports_controller_spec.rb | 40 +- ...external_assessment_import_service_spec.rb | 555 ++++++++++++++++-- 4 files changed, 853 insertions(+), 108 deletions(-) diff --git a/app/services/course/gradebook/external_assessment_import_service.rb b/app/services/course/gradebook/external_assessment_import_service.rb index a4118689aa..934bd94a1c 100644 --- a/app/services/course/gradebook/external_assessment_import_service.rb +++ b/app/services/course/gradebook/external_assessment_import_service.rb @@ -11,6 +11,8 @@ def initialize(payload) end end + HEADER_SUGGESTION_MAX_DISTANCE = 2 + def initialize(course:, actor:, components:, identifier_mode:, csv_data:) @course = course @actor = actor @@ -27,8 +29,11 @@ def preview unresolved: resolution[:unresolved], malformed: resolution[:malformed], sample: sample(resolution[:resolved]), - conflicts: conflicts(resolution[:resolved]), - out_of_range: out_of_range(resolution[:resolved]) + conflict_rows: conflict_rows(resolution[:resolved]), + out_of_range: out_of_range(resolution[:resolved]), + reassignments: reassignments(resolution[:resolved]), + total_rows: resolution[:resolved].size, + column_order: column_order(rows) } end @@ -66,6 +71,8 @@ def parsed_rows guard_no_duplicate_components! table = CSV.parse(@csv_data.to_s, headers: true) guard_header!(table.headers) + guard_not_empty!(table) + guard_unique_identifiers!(table) table end @@ -78,9 +85,110 @@ def guard_no_duplicate_components! def guard_header!(headers) expected = [identifier_header] + @components.map { |c| c[:name] } - return if headers == expected + identifier_not_first = headers.include?(identifier_header) && + headers.compact.first != identifier_header + return if headers.tally == expected.tally && !identifier_not_first + + suggestions = header_suggestions(expected, headers) + raise ImportError, { message: 'bad_header', + missing: missing_headers(expected, headers, suggestions), + unrecognized: unrecognized_headers(expected, headers, suggestions), + suggestions: suggestions, + duplicates: duplicate_headers(headers), + identifierNotFirst: identifier_not_first } + end + + def guard_not_empty!(table) + return unless table.empty? + + raise ImportError, { message: 'empty_csv' } + end + + def guard_unique_identifiers!(table) + keys = table.filter_map do |row| + key = lookup_key(row[identifier_header].to_s.strip) + key unless key.empty? + end + duplicates = keys.tally.select { |_key, count| count > 1 }.keys + return if duplicates.empty? + + raise ImportError, { message: 'duplicate_identifier', identifiers: duplicates } + end + + # Headers that appear more than once in the uploaded file, with their counts. + def duplicate_headers(headers) + headers.compact.tally.select { |_name, count| count > 1 }. + map { |name, count| { name: name, count: count } } + end - raise ImportError, { message: 'bad_header', expected: expected, got: headers } + # Expected columns absent from the upload. A column we can pair to a likely + # typo'd upload is reported via suggestions ("did you mean"), not here. + def missing_headers(expected, headers, suggestions) + (expected - headers) - suggestions.map { |s| s[:expected] } + end + + # Uploaded columns that are not expected. The upload side of a typo pair is + # reported via suggestions, so it is excluded here. + def unrecognized_headers(expected, headers, suggestions) + (headers.compact.uniq - expected) - suggestions.map { |s| s[:didYouMean] } + end + + # For each expected header absent from the uploaded file, suggest the closest + # *unused* uploaded header within HEADER_SUGGESTION_MAX_DISTANCE edits (catches + # typos and singular/plural, e.g. required "Midterms" vs uploaded "Midterm"). + def header_suggestions(expected, got) + available = (got - expected).compact + (expected - got).filter_map do |want| + near = available.min_by { |have| levenshtein(want, have) } + next if near.nil? || levenshtein(want, near) > HEADER_SUGGESTION_MAX_DISTANCE + + available.delete(near) # never suggest the same uploaded column twice + { expected: want, didYouMean: near } + end + end + + def levenshtein(source, target) + return target.length if source.empty? + return source.length if target.empty? + + distances = (0..target.length).to_a + + source.each_char.with_index(1) do |source_char, source_index| + distances = next_levenshtein_row( + distances, + target, + source_char, + source_index + ) + end + + distances[target.length] + end + + def next_levenshtein_row(previous, target, source_char, source_index) + current = [source_index] + + target.each_char.with_index(1) do |target_char, target_index| + current << levenshtein_cell( + previous, + current, + source_char, + target_char, + target_index + ) + end + + current + end + + def levenshtein_cell(previous, current, source_char, target_char, target_index) + substitution_cost = (source_char == target_char) ? 0 : 1 + + [ + previous[target_index] + 1, + current[target_index - 1] + 1, + previous[target_index - 1] + substitution_cost + ].min end def identifier_header @@ -90,7 +198,7 @@ def identifier_header def roster_lookup @roster_lookup ||= if @identifier_mode == 'email' - @course.course_users.includes(:user).index_by { |cu| cu.user.email.downcase } + @course.course_users.includes(user: :emails).index_by { |cu| cu.user.email.downcase } else @course.course_users.where.not(external_id: [nil, '']).index_by(&:external_id) end @@ -113,11 +221,17 @@ def resolve(table) end grades, bad = parse_grades(row, idx) malformed.concat(bad) - resolved << { course_user: course_user, identifier: identifier, grades: grades } + resolved << { course_user: course_user, identifier: normalized_email(identifier, course_user), grades: grades } end { resolved: resolved, unresolved: unresolved.uniq, malformed: malformed } end + def normalized_email(identifier, course_user) + return course_user.user.email if @identifier_mode == 'email' && course_user&.user&.email.present? + + identifier + end + def parse_grades(row, row_idx) grades = {} malformed = [] @@ -151,58 +265,127 @@ def out_of_range(resolved) max = component[:maximum_grade] next unless grade < 0 || grade > max + cells << { identifier: row[:identifier], component: component[:name], grade: grade, max: max, - kind: grade < 0 ? 'below' : 'above' + kind: (grade < 0) ? 'below' : 'above' } end end cells end + # A row is "reassigned" when its CSV identifier was previously imported as the + # binding key for a grade now owned by a DIFFERENT course_user — i.e. the + # identifier has changed hands (e.g. an External ID recycled to a new student). + # Resolution still binds by course_user_id, so the grade is placed on whoever + # currently owns the identifier; this advisory asks the user to confirm that is + # the intended person. Unlike a conflict row it can fire on a brand-new insert, + # so it is surfaced standalone, not via the conflict table. One entry per + # identifier. Naturally silent on benign same-student drift and on mode switches + # (an email never equals a stored External ID). + def reassignments(resolved) + owners = snapshot_owners + return [] if owners.empty? + + raw = collect_reassignments(resolved, owners) + names = course_user_names(raw.flat_map { |entry| entry[:previous_ids] }) + raw.map do |entry| + { identifier: entry[:identifier], currentStudent: entry[:current_student], + previousStudents: entry[:previous_ids].filter_map { |id| names[id] } } + end + end + + # One reassignment entry per identifier whose grade is now owned by a different + # course_user than the resolved row. Dedups by identifier, marking it seen only + # once a genuine reassignment is found so the first such row wins. + def collect_reassignments(resolved, owners) + seen = Set.new + resolved.filter_map do |row| + incoming = row[:identifier] + others = owners.fetch(incoming, []).reject { |cu_id| cu_id == row[:course_user].id } + next if others.empty? || !seen.add?(incoming) + + { identifier: incoming, current_student: row[:course_user].name, previous_ids: others } + end + end + + # { imported_identifier => Set[course_user_id] } across all of the course's + # external grades. Two different course_users can share one snapshot value + # (a stale grade plus a fresh one) — that overlap is exactly the reassignment. + def snapshot_owners + @snapshot_owners ||= + Course::ExternalAssessmentGrade. + joins(:external_assessment). + where(course_external_assessments: { course_id: @course.id }). + where.not(imported_identifier: [nil, '']). + pluck(:imported_identifier, :course_user_id). + each_with_object({}) { |(ident, cu_id), acc| (acc[ident] ||= Set.new) << cu_id } + end + + def course_user_names(ids) + return {} if ids.empty? + + CourseUser.where(id: ids.uniq).pluck(:id, :name).to_h + end + def sample(resolved) resolved.first(5).map do |r| - { studentName: r[:course_user].name, grades: r[:grades] } + { identifier: r[:identifier], grades: r[:grades] } end end - def conflicts(resolved) - result = [] - @components.each do |component| - conflicts_for_component(component, resolved, result) + # Component column headers in the order they appear in the uploaded CSV, + # with the identifier header removed. guard_header! has already ensured the + # header set matches @components, so the remainder are exactly the component + # names in CSV order. + def column_order(table) + table.headers.compact - [identifier_header] + end + + def conflict_rows(resolved) + grades_by_component = @components.to_h do |component| + external = existing_external(component[:name]) + grades = external ? external.external_assessment_grades.index_by(&:course_user_id) : {} + [component[:name], grades] end - result + + resolved.filter_map { |row| build_conflict_row(row, grades_by_component) } end - def conflicts_for_component(component, resolved, result) - external = existing_external(component[:name]) - return unless external + def build_conflict_row(row, grades_by_component) + cells = {} + changed_any = false - grade_by_cu = external.external_assessment_grades.index_by(&:course_user_id) - resolved.each do |row| - conflict = conflict_entry(component[:name], row, grade_by_cu) - result << conflict if conflict + @components.each do |component| + cell, changed = conflict_cell(component, row, grades_by_component) + changed_any ||= changed + cells[component[:name]] = cell end + + return nil unless changed_any + + { identifier: row[:identifier], studentName: row[:course_user].name, cells: cells } end - def conflict_entry(component_name, row, grade_by_cu) - in_file = row[:grades][component_name] - return nil if in_file.nil? + def conflict_cell(component, row, grades_by_component) + name = component[:name] + in_file = row[:grades][name] + existing_record = grades_by_component[name][row[:course_user].id] + existing = existing_record&.grade + changed = grade_changed?(existing, in_file) + [{ existing: existing&.to_f, inFile: in_file, changed: changed }, changed] + end - existing = grade_by_cu[row[:course_user].id] - return nil if existing.nil? || existing.grade.nil? + def grades_comparable?(existing, in_file) + !existing.nil? && !in_file.nil? + end - { - component: component_name, - studentName: row[:course_user].name, - existingGrade: existing.grade.to_f, - inFileGrade: in_file, - identifierMismatch: existing.imported_identifier.present? && - existing.imported_identifier != row[:identifier] - } + def grade_changed?(existing, in_file) + grades_comparable?(existing, in_file) && existing.to_f.round(2) != in_file.to_f.round(2) end def existing_external(name) @@ -225,32 +408,84 @@ def create_component(component, resolved) end def upsert_grades(external, component, resolved, on_conflict:) - grade_by_cu = external.external_assessment_grades.index_by(&:course_user_id) - written = 0 - resolved.each do |row| - written += upsert_one(grade_by_cu, component, row, on_conflict) - end - written + inserts, upserts = partition_grade_writes( + external, component[:name], resolved, on_conflict + ) + + bulk_insert(inserts) + bulk_upsert(upserts) + + inserts.size + upserts.size end - def upsert_one(grade_by_cu, component, row, on_conflict) - in_file = row[:grades][component[:name]] - return 0 if in_file.nil? + def partition_grade_writes(external, component_name, resolved, on_conflict) + context = { + external: external, + component_name: component_name, + existing_by_cu: external.external_assessment_grades.index_by(&:course_user_id) + } - existing = grade_by_cu[row[:course_user].id] - if existing.nil? - bulk_insert([build_grade(existing_external(component[:name]), row, value: in_file)]) - 1 - elsif on_conflict.to_s == 'replace' || existing.grade.nil? - User.with_stamper(@actor) do - existing.update!(grade: in_file, imported_identifier: row[:identifier]) - end - 1 + if on_conflict.to_s == 'replace' + collect_replace_grade_writes(resolved, context) else - 0 + collect_keep_grade_writes(resolved, context) end end + def collect_replace_grade_writes(resolved, context) + resolved.each_with_object({ inserts: [], upserts: [] }) do |row, buckets| + existing, record = grade_write_for(row, context) + next if record.nil? + + if existing.nil? + buckets[:inserts] << record + else + buckets[:upserts] << record + end + end.values_at(:inserts, :upserts) + end + + def collect_keep_grade_writes(resolved, context) + resolved.each_with_object({ inserts: [], upserts: [] }) do |row, buckets| + existing, record = grade_write_for(row, context) + next if record.nil? + + if existing.nil? + buckets[:inserts] << record + elsif existing.grade.nil? + buckets[:upserts] << record + end + end.values_at(:inserts, :upserts) + end + + def grade_write_for(row, context) + in_file = row[:grades][context[:component_name]] + return if in_file.nil? + + existing = context[:existing_by_cu][row[:course_user].id] + record = build_grade(context[:external], row, value: in_file) + + [existing, record] + end + + # Bulk update via Postgres ON CONFLICT. Only the listed columns are written on + # conflict, so creator_id/created_at on the existing row are preserved; grade, + # imported_identifier, updater_id and updated_at are refreshed. validate:false + # because the rows already exist (the uniqueness validation would reject them) + # and grades were already coerced to Float during parsing. + def bulk_upsert(records) + return if records.empty? + + Course::ExternalAssessmentGrade.import( + records, + validate: false, + on_duplicate_key_update: { + conflict_target: [:external_assessment_id, :course_user_id], + columns: [:grade, :imported_identifier, :updater_id, :updated_at] + } + ) + end + def build_grade(external, resolved_row, value: nil) Course::ExternalAssessmentGrade.new( external_assessment: external, diff --git a/app/views/course/external_assessment_imports/preview.json.jbuilder b/app/views/course/external_assessment_imports/preview.json.jbuilder index 92c7b19c04..0ac5f0234c 100644 --- a/app/views/course/external_assessment_imports/preview.json.jbuilder +++ b/app/views/course/external_assessment_imports/preview.json.jbuilder @@ -3,14 +3,25 @@ json.ok @result[:ok] json.unresolved @result[:unresolved] json.malformed @result[:malformed] json.sample @result[:sample] do |row| - json.studentName row[:studentName] + json.identifier row[:identifier] json.grades row[:grades] end -json.conflicts @result[:conflicts] do |c| - json.component c[:component] - json.studentName c[:studentName] - json.existingGrade c[:existingGrade] - json.inFileGrade c[:inFileGrade] - json.identifierMismatch c[:identifierMismatch] +json.conflictRows @result[:conflict_rows] do |row| + json.identifier row[:identifier] + json.studentName row[:studentName] + json.cells row[:cells] +end +json.outOfRange @result[:out_of_range] do |cell| + json.identifier cell[:identifier] + json.component cell[:component] + json.grade cell[:grade] + json.max cell[:max] + json.kind cell[:kind] +end +json.reassignments @result[:reassignments] do |entry| + json.identifier entry[:identifier] + json.currentStudent entry[:currentStudent] + json.previousStudents entry[:previousStudents] end -json.outOfRange @result[:out_of_range] +json.totalRows @result[:total_rows] +json.columnOrder @result[:column_order] diff --git a/spec/controllers/course/external_assessment_imports_controller_spec.rb b/spec/controllers/course/external_assessment_imports_controller_spec.rb index 9c1fc113d2..b8ba93ea81 100644 --- a/spec/controllers/course/external_assessment_imports_controller_spec.rb +++ b/spec/controllers/course/external_assessment_imports_controller_spec.rb @@ -28,7 +28,7 @@ not_to(change { Course::ExternalAssessmentGrade.count }) data = JSON.parse(response.body) expect(data['ok']).to be(true) - expect(data['sample'].first['studentName']).to eq(alice.name) + expect(data['sample'].first['identifier']).to eq('A001') end it 'returns ok:false with unresolved identifiers' do @@ -41,6 +41,7 @@ it 'returns 422 on a malformed header' do post :preview, params: base_params.merge(csvData: "Wrong,Midterm\nA001,1\n") expect(response).to have_http_status(:unprocessable_entity) + expect(JSON.parse(response.body)['errors']['message']).to eq('bad_header') end it 'returns conflicts when a grade already exists' do @@ -54,8 +55,8 @@ post :preview, params: base_params.merge(csvData: "External ID,Midterm\nA001,20\n") data = JSON.parse(response.body) - expect(data['conflicts'].size).to eq(1) - expect(data['conflicts'].first['component']).to eq('Midterm') + expect(data['conflictRows'].size).to eq(1) + expect(data['conflictRows'].first['studentName']).to eq(alice.name) end it 'returns ok:false with malformed grade cells' do @@ -72,6 +73,7 @@ components: dup_components, csvData: "External ID,Midterm,Midterm\nA001,1,2\n" ) + expect(JSON.parse(response.body)['errors']['message']).to eq('duplicate_component_name') expect(response).to have_http_status(:unprocessable_entity) end @@ -92,7 +94,7 @@ ) data = JSON.parse(response.body) expect(data['ok']).to be(true) - expect(data['sample'].first['studentName']).to eq(alice.name) + expect(data['sample'].first['identifier']).to eq(alice.user.email) end end @@ -138,6 +140,36 @@ data = JSON.parse(response.body) expect(data['updatedComponents']).to eq(1) expect(data['createdComponents']).to eq(0) + kept = Course::ExternalAssessment.for_course(course).find_by(title: 'Midterm'). + external_assessment_grades.find_by(course_user: alice) + expect(kept.grade).to eq(41) + end + + it 'overwrites an existing grade when onConflict is replace' do + post :create, params: base_params.merge(onConflict: 'replace') + post :create, params: base_params.merge(onConflict: 'replace', + csvData: "External ID,Midterm\nA001,99\n") + data = JSON.parse(response.body) + expect(data['updatedComponents']).to eq(1) + expect(data['createdComponents']).to eq(0) + grade = Course::ExternalAssessmentGrade. + joins(:external_assessment). + find_by(course_externalassessments: { course_id: course.id }, + course_user_id: alice.id) + expect(grade.grade).to eq(99) + end + + it 'returns 422 on duplicate component names' do + dup_components = [{ name: 'Midterm', weightage: 30, maximumGrade: 50 }, + { name: 'Midterm', weightage: 20, maximumGrade: 40 }] + expect do + post :create, params: base_params.merge( + components: dup_components, + csvData: "External ID,Midterm,Midterm\nA001,1,2\n", + onConflict: 'replace' + ) + end.not_to(change { Course::ExternalAssessmentGrade.count }) + expect(response).to have_http_status(:unprocessable_entity) end it 'returns 422 and writes nothing on malformed grade cells' do diff --git a/spec/services/course/gradebook/external_assessment_import_service_spec.rb b/spec/services/course/gradebook/external_assessment_import_service_spec.rb index 82b3c7700e..af88d159ec 100644 --- a/spec/services/course/gradebook/external_assessment_import_service_spec.rb +++ b/spec/services/course/gradebook/external_assessment_import_service_spec.rb @@ -21,26 +21,36 @@ def service(csv_data:, components:, identifier_mode: 'student_id') describe '#preview' do it 'writes nothing (dry-run)' do - csv = "External ID,Midterm\nA001,41\nA002,37\n" + csv = "External ID,Midterm\nA001,41\nA002,37\n" expect { service(csv_data: csv, components: components).preview }. - not_to(change { Course::ExternalAssessmentGrade.count }) + to not_change { Course::ExternalAssessmentGrade.count }. + and not_change { Course::ExternalAssessment.count } end - it 'returns ok with the first 5 resolved rows (student names)' do + it 'returns ok with the first 5 resolved rows (External IDs)' do csv = "External ID,Midterm\nA001,41\nA002,37\n" result = service(csv_data: csv, components: components).preview expect(result[:ok]).to be(true) expect(result[:unresolved]).to be_empty expect(result[:sample].size).to eq(2) - expect(result[:sample].map { |r| r[:studentName] }).to include(alice.name, bob.name) + expect(result[:sample].map { |r| r[:identifier] }).to include(alice.external_id, bob.external_id) expect(result[:sample].first[:grades]['Midterm']).to eq(41.0) end - it 'resolves by email when in email mode' do - csv = "Email,Midterm\n#{alice.user.email},41\n" + it 'caps the sample at 5 rows but reports the true total in total_rows' do + extra = (1..5).map { |i| create(:course_student, course: course, external_id: "X00#{i}") } + ids = ['A001', 'A002'] + extra.map(&:external_id) + csv = "External ID,Midterm\n" + ids.map { |id| "#{id},10" }.join("\n") + "\n" + result = service(csv_data: csv, components: components).preview + expect(result[:sample].size).to eq(5) + expect(result[:total_rows]).to eq(7) + end + + it 'normalizes preview identifiers to the roster email when resolving by email' do + csv = "Email,Midterm\n#{alice.user.email.upcase},41\n" result = service(csv_data: csv, components: components, identifier_mode: 'email').preview expect(result[:ok]).to be(true) - expect(result[:sample].first[:studentName]).to eq(alice.name) + expect(result[:sample].first[:identifier]).to eq(alice.user.email) end it 'fails the whole batch on any unresolved identifier' do @@ -71,25 +81,61 @@ def service(csv_data:, components:, identifier_mode: 'student_id') to raise_error(described_class::ImportError) end - it 'returns ok with empty sample when CSV has no data rows' do + it 'raises duplicate_component_name (not bad_header) for an in-file duplicate component' do + dup = [{ name: 'Midterm', weightage: 30, maximum_grade: 50 }, + { name: 'Midterm', weightage: 20, maximum_grade: 40 }] + csv = "External ID,Midterm,Midterm\nA001,1,2\n" + expect { service(csv_data: csv, components: dup).preview }. + to raise_error(described_class::ImportError) do |error| + expect(error.payload[:message]).to eq('duplicate_component_name') + end + end + + it 'rejects an otherwise-valid CSV with no data rows as empty_csv' do csv = "External ID,Midterm\n" - result = service(csv_data: csv, components: components).preview - expect(result[:ok]).to be(true) - expect(result[:sample]).to be_empty - expect(result[:conflicts]).to be_empty + expect { service(csv_data: csv, components: components).preview }. + to raise_error(described_class::ImportError) do |error| + expect(error.payload[:message]).to eq('empty_csv') + end end - it 'resolves by email case-insensitively' do - csv = "Email,Midterm\n#{alice.user.email.upcase},41\n" - result = service(csv_data: csv, components: components, identifier_mode: 'email').preview + it 'writes nothing and raises empty_csv on commit of a header-only CSV' do + csv = "External ID,Midterm\n" + expect do + expect { service(csv_data: csv, components: components).commit(on_conflict: 'replace') }. + to raise_error(described_class::ImportError) + end.not_to(change { Course::ExternalAssessmentGrade.count }) + end + + it 'treats a whitespace-only cell as ungraded, not malformed' do + csv = "External ID,Midterm\nA001, \n" + result = service(csv_data: csv, components: components).preview expect(result[:ok]).to be(true) - expect(result[:sample].first[:studentName]).to eq(alice.name) + expect(result[:malformed]).to be_empty + expect(result[:sample].first[:grades]['Midterm']).to be_nil end - it 'deduplicates unresolved identifiers' do + it 'rejects duplicate identifiers even if unresolvable' do csv = "External ID,Midterm\nZZZZ,1\nZZZZ,2\n" + expect { service(csv_data: csv, components: components).preview }. + to raise_error(described_class::ImportError) do |error| + expect(error.payload[:message]).to eq('duplicate_identifier') + expect(error.payload[:identifiers]).to include('ZZZZ') + end + end + + it 'reports the malformed cell with its 1-based data-row number and component' do + csv = "External ID,Midterm\nA001,41\nA002,oops\n" result = service(csv_data: csv, components: components).preview - expect(result[:unresolved].count('ZZZZ')).to eq(1) + expect(result[:malformed]).to include('row 3, Midterm: oops') + end + + it 'accumulates every malformed cell across rows and components' do + comps = [{ name: 'Midterm', weightage: 30, maximum_grade: 50 }, + { name: 'Final', weightage: 50, maximum_grade: 100 }] + csv = "External ID,Midterm,Final\nA001,bad,worse\n" + result = service(csv_data: csv, components: comps).preview + expect(result[:malformed].size).to eq(2) end it 'treats a blank cell as ungraded in the sample' do @@ -102,20 +148,69 @@ def service(csv_data:, components:, identifier_mode: 'student_id') csv = "External ID,Midterm\nA001,41\n" result = service(csv_data: csv, components: components).preview expect(result[:ok]).to be(true) - expect(result[:sample].first[:studentName]).to eq(alice.name) + expect(result[:sample].first[:identifier]).to eq(alice.external_id) end it 'accepts the Email header in email mode' do csv = "Email,Midterm\n#{alice.user.email},41\n" result = service(csv_data: csv, components: components, identifier_mode: 'email').preview expect(result[:ok]).to be(true) - expect(result[:sample].first[:studentName]).to eq(alice.name) + expect(result[:sample].first[:identifier]).to eq(alice.user.email) end - it 'rejects the legacy Identifier header' do - csv = "Identifier,Midterm\nA001,41\n" + it 'rejects the External ID header when resolving by email' do + csv = "External ID,Midterm\n#{alice.user.email},41\n" + expect { service(csv_data: csv, components: components, identifier_mode: 'email').preview }. + to raise_error(described_class::ImportError) do |error| + expect(error.payload[:message]).to eq('bad_header') + end + end + + it 'reports duplicate headers in the bad_header payload' do + csv = "External ID,Midterm,Midterm\nA001,1,2\n" expect { service(csv_data: csv, components: components).preview }. - to raise_error(described_class::ImportError) + to raise_error(described_class::ImportError) do |error| + expect(error.payload[:message]).to eq('bad_header') + expect(error.payload[:duplicates]).to include(name: 'Midterm', count: 2) + end + end + + it 'leaves duplicates empty for a non-duplicate header mismatch' do + csv = "Wrong,Midterm\nA001,41\n" + expect { service(csv_data: csv, components: components).preview }. + to raise_error(described_class::ImportError) do |error| + expect(error.payload[:duplicates]).to eq([]) + end + end + + it 'reports only duplicates when every expected column is present but repeated' do + csv = "External ID,Midterm,Midterm\nA001,1,2\n" + expect { service(csv_data: csv, components: components).preview }. + to raise_error(described_class::ImportError) do |error| + expect(error.payload[:duplicates]).to include(name: 'Midterm', count: 2) + expect(error.payload[:missing]).to eq([]) + expect(error.payload[:unrecognized]).to eq([]) + end + end + + it 'reports missing and unrecognized columns for a header mismatch' do + csv = "External ID,Wrong\nA001,41\n" + expect { service(csv_data: csv, components: components).preview }. + to raise_error(described_class::ImportError) do |error| + expect(error.payload[:missing]).to eq(['Midterm']) + expect(error.payload[:unrecognized]).to eq(['Wrong']) + end + end + + it 'returns the component columns in the CSV header order, not the defined order' do + two_components = [ + { name: 'Midterm', weightage: 30, maximum_grade: 50 }, + { name: 'Final', weightage: 70, maximum_grade: 100 } + ] + # CSV lists Final before Midterm;. + csv = "External ID,Final,Midterm\n80,A001,41\n" + result = service(csv_data: csv, components: two_components).preview + expect(result[:column_order]).to eq(['Final', 'Midterm']) end end @@ -127,14 +222,43 @@ def service(csv_data:, components:, identifier_mode: 'student_id') csv = "External ID,Midterms\nS123,105\n" result = service(csv_data: csv, components: oor_components).preview expect(result[:ok]).to be(true) # out-of-range is advisory, not a block - expect(result[:out_of_range]).to include(a_string_matching(/Midterms: 105.* exceeds 100/)) + expect(result[:out_of_range]).to include( + a_hash_including( + component: "Midterms", + identifier: "S123", + grade: 105.0, + kind: "above", + max: 100 + ) + ) end it 'flags grades below 0' do csv = "External ID,Midterms\nS123,-2\n" result = service(csv_data: csv, components: oor_components).preview expect(result[:ok]).to be(true) - expect(result[:out_of_range]).to include(a_string_matching(/Midterms: -2.* below 0/)) + expect(result[:out_of_range]).to include( + a_hash_including( + component: "Midterms", + identifier: "S123", + grade: -2.0, + kind: "below", + max: 100 + ) + ) + end + + it 'does not flag a grade exactly at the maximum or at zero' do + csv = "External ID,Midterms\nS123,100\nA001,0\n" + result = service(csv_data: csv, components: oor_components).preview + expect(result[:ok]).to be(true) + expect(result[:out_of_range]).to be_empty + end + + it 'ignores blank cells for out-of-range detection' do + csv = "External ID,Midterms\nS123,\n" + result = service(csv_data: csv, components: oor_components).preview + expect(result[:out_of_range]).to be_empty end end @@ -197,6 +321,49 @@ def service(csv_data:, components:, identifier_mode: 'student_id') end.to raise_error(described_class::ImportError) end.not_to(change { Course::ExternalAssessmentGrade.count }) end + + it 'raises validation_failed with the unresolved identifiers in the payload' do + csv = "External ID,Midterm\nA001,41\nZZZ,9\n" + expect { service(csv_data: csv, components: components).commit(on_conflict: 'replace') }. + to raise_error(described_class::ImportError) do |error| + expect(error.payload[:message]).to eq('validation_failed') + expect(error.payload[:unresolved]).to include('ZZZ') + end + end + + it 'aborts the commit and writes nothing when a cell is malformed' do + csv = "External ID,Midterm\nA001,41\nA002,oops\n" + expect do + expect do + service(csv_data: csv, components: components).commit(on_conflict: 'replace') + end.to raise_error(described_class::ImportError) do |error| + expect(error.payload[:malformed]).to be_present + end + end.not_to(change { Course::ExternalAssessmentGrade.count }) + end + + it 'rolls back all components when a later component fails mid-write' do + # Pre-create "Final" outside the service WITHOUT a gradebook_contribution so + # the service treats it as new (existing_external matches by title) — instead, + # create a title collision the service cannot reconcile. + comps = [{ name: 'Midterm', weightage: 30, maximum_grade: 50 }, + { name: 'Quiz', weightage: 20, maximum_grade: 20 }] + csv = "External ID,Midterm,Quiz\nA001,40,10\n" + # Sabotage: make the second create! blow up by stubbing it to raise after the first wrote. + call = 0 + allow(Course::ExternalAssessment).to receive(:create_for_course!).and_wrap_original do |orig, **kwargs| + call += 1 + raise ActiveRecord::RecordInvalid if call == 2 + + orig.call(**kwargs) + end + expect do + expect do + service(csv_data: csv, components: comps).commit(on_conflict: 'replace') + end.to raise_error(ActiveRecord::RecordInvalid) + end.to change { Course::ExternalAssessment.for_course(course).count }.by(0). + and(change { Course::ExternalAssessmentGrade.count }.by(0)) + end end describe '#commit (upsert into existing component)' do @@ -246,24 +413,68 @@ def seed_initial! expect(external.gradebook_contribution.reload.weight).to eq(30) end - it 'lists conflicts only for existing non-blank grade rows' do - seed_initial! + it 'groups changed cells by student and drops unchanged / new-fill students' do + seed_initial! # alice (A001) Midterm=10; bob (A002) has no grade yet csv = "External ID,Midterm\nA001,20\nA002,33\n" result = service(csv_data: csv, components: components).preview - expect(result[:conflicts].map { |c| c[:studentName] }).to contain_exactly(alice.name) - conflict = result[:conflicts].first - expect(conflict[:existingGrade]).to eq(10.0) - expect(conflict[:inFileGrade]).to eq(20.0) + + rows = result[:conflict_rows] + expect(rows.map { |r| r[:studentName] }).to contain_exactly(alice.name) + cell = rows.first[:cells]['Midterm'] + expect(cell[:existing]).to eq(10.0) + expect(cell[:inFile]).to eq(20.0) + expect(cell[:changed]).to be(true) end - it 'flags identifierMismatch when a row resolves to a student imported under a different id' do + it 'drops a student whose grade is unchanged (equal at 2 dp)' do + seed_initial! # alice Midterm=10 + csv = "External ID,Midterm\nA001,10.00\n" + result = service(csv_data: csv, components: components).preview + expect(result[:conflict_rows]).to be_empty + end + + it 'flags a reassignment when an identifier now resolves to a different student than it was imported under' do + seed_initial! # alice imported under 'A001' (snapshot 'A001' on alice's grade) + alice.update!(external_id: 'AOLD') # free up A001 + carol = create(:course_student, course: course, external_id: 'A001') # A001 recycled to carol + csv = "External ID,Midterm\nA001,77\n" + result = service(csv_data: csv, components: components).preview + + entry = result[:reassignments].find { |r| r[:identifier] == 'A001' } + expect(entry[:currentStudent]).to eq(carol.name) + expect(entry[:previousStudents]).to include(alice.name) + # carol is a brand-new insert, so she is NOT in the conflict table + expect(result[:conflict_rows].map { |r| r[:studentName] }).not_to include(carol.name) + end + + it 'does not flag a reassignment when only the same student\'s own identifier changed' do seed_initial! # alice imported under 'A001' - bob.update!(external_id: 'A777') - alice.update!(external_id: 'A002') # alice now owns A002 (formerly bob's) - csv = "External ID,Midterm\nA002,20\n" # A002 now -> alice, but her grade was imported as 'A001' + bob.update!(external_id: 'A777') # free A002 + alice.update!(external_id: 'A002') # alice's OWN id drifted A001 -> A002 + csv = "External ID,Midterm\nA002,20\n" result = service(csv_data: csv, components: components).preview - mismatch = result[:conflicts].find { |c| c[:studentName] == alice.name } - expect(mismatch[:identifierMismatch]).to be(true) + expect(result[:reassignments]).to be_empty + end + + it 'does not flag a reassignment when switching identifier mode between imports' do + seed_initial! # alice imported under External ID 'A001' + csv = "Email,Midterm\n#{alice.user.email},20\n" + result = service(csv_data: csv, components: components, identifier_mode: 'email').preview + expect(result[:reassignments]).to be_empty + end + + it 'reports changed cells across multiple components on one student row' do + comps = [{ name: 'Midterm', weightage: 30, maximum_grade: 50 }, + { name: 'Final', weightage: 50, maximum_grade: 100 }] + service(csv_data: "External ID,Midterm,Final\nA001,10,80\n", components: comps). + commit(on_conflict: 'replace') + csv = "External ID,Midterm,Final\nA001,20,90\n" + result = service(csv_data: csv, components: comps).preview + + expect(result[:conflict_rows].length).to eq(1) + cells = result[:conflict_rows].first[:cells] + expect(cells['Midterm'][:changed]).to be(true) + expect(cells['Final'][:changed]).to be(true) end it 'returns updatedComponents: 1 after an upsert' do @@ -283,16 +494,38 @@ def seed_initial! expect(external.external_assessment_grades.find_by(course_user: alice).grade).to eq(50) end - it 'detects conflicts across multiple components' do + it 'refreshes imported_identifier on upsert while preserving the original creator' do + external = seed_initial! # alice imported under 'A001', value 10 + grade = external.external_assessment_grades.find_by(course_user: alice) + original_creator_id = grade.creator_id + alice.update!(external_id: 'A001-NEW') + csv = "External ID,Midterm\nA001-NEW,22\n" + service(csv_data: csv, components: components).commit(on_conflict: 'replace') + grade.reload + expect(grade.grade).to eq(22) + expect(grade.imported_identifier).to eq('A001-NEW') + expect(grade.creator_id).to eq(original_creator_id) + end + + it 'inserts new students and upserts existing ones in a single replace commit' do + external = seed_initial! # alice has Midterm=10, bob has none + csv = "External ID,Midterm\nA001,15\nA002,20\n" + summary = service(csv_data: csv, components: components).commit(on_conflict: 'replace') + expect(external.external_assessment_grades.find_by(course_user: alice).grade).to eq(15) + expect(external.external_assessment_grades.find_by(course_user: bob).grade).to eq(20) + expect(summary[:gradesWritten]).to eq(2) + end + + it 'includes the unchanged cell with its real existing value when another cell changed' do comps = [{ name: 'Midterm', weightage: 30, maximum_grade: 50 }, { name: 'Final', weightage: 50, maximum_grade: 100 }] - # Seed both components - seed_csv = "External ID,Midterm,Final\nA001,10,80\n" - service(csv_data: seed_csv, components: comps).commit(on_conflict: 'replace') - # Re-import with different values - csv = "External ID,Midterm,Final\nA001,20,90\n" + service(csv_data: "External ID,Midterm,Final\nA001,10,80\n", components: comps). + commit(on_conflict: 'replace') + csv = "External ID,Midterm,Final\nA001,20,80\n" # only Midterm changes result = service(csv_data: csv, components: comps).preview - expect(result[:conflicts].map { |c| c[:component] }).to contain_exactly('Midterm', 'Final') + cells = result[:conflict_rows].first[:cells] + expect(cells['Midterm']).to include(existing: 10.0, inFile: 20.0, changed: true) + expect(cells['Final']).to include(existing: 80.0, inFile: 80.0, changed: false) end end @@ -308,5 +541,239 @@ def seed_initial! expect(grade.grade).to eq(41) end end + + describe 'order-free assessment columns' do + let(:components) do + [{ name: 'Midterm', weightage: 30, maximum_grade: 50 }, + { name: 'Finals', weightage: 70, maximum_grade: 100 }] + end + + it 'accepts assessment columns in any order, with the identifier first' do + csv = "External ID,Finals,Midterm\nA001,1,2\n" + result = service(csv_data: csv, components: components).preview + expect(result[:ok]).to be(true) + expect(result[:sample].first[:grades]['Midterm']).to eq(2.0) + expect(result[:sample].first[:grades]['Finals']).to eq(1.0) + + csv = "External ID,Midterm,Finals\nA001,1,2\n" + result = service(csv_data: csv, components: components).preview + expect(result[:ok]).to be(true) + expect(result[:sample].first[:grades]['Midterm']).to eq(1.0) + expect(result[:sample].first[:grades]['Finals']).to eq(2.0) + end + + it 'rejects an unexpected extra column' do + csv = "External ID,Midterm,Finals,Notes\nA001,41,61,x\n" + expect { service(csv_data: csv, components: components).preview }. + to raise_error(described_class::ImportError) { |e| + expect(e.payload[:message]).to eq('bad_header') + } + end + + it 'rejects an unexpected extra column even when the number of headers are correct' do + csv = "External ID,Finals,Notes\nA001,1,x\n" + expect { service(csv_data: csv, components: components).preview }. + to raise_error(described_class::ImportError) { |e| + expect(e.payload[:message]).to eq('bad_header') + } + end + + it 'rejects a duplicated column' do + csv = "External ID,Midterm,Midterm,Finals\nA001,41,41,61\n" + expect { service(csv_data: csv, components: components).preview }. + to raise_error(described_class::ImportError) + end + + it 'rejects a duplicated column even when the number of headers are correct' do + csv = "External ID,Midterm,Midterm\nA001,41,41\n" + expect { service(csv_data: csv, components: components).preview }. + to raise_error(described_class::ImportError) + end + + it 'rejects a file missing the identifier column' do + csv = "Midterm\n41\n" + expect { service(csv_data: csv, components: components).preview }. + to raise_error(described_class::ImportError) do |error| + expect(error.payload[:message]).to eq('bad_header') + expect(error.payload[:missing]).to include('External ID') + end + end + end + + describe 'duplicate identifiers' do + it 'rejects a CSV with the same identifier in two rows' do + csv = "External ID,Midterm\nA001,40\nA001,50\n" + expect { service(csv_data: csv, components: components).preview }. + to raise_error(described_class::ImportError) do |error| + expect(error.payload[:message]).to eq('duplicate_identifier') + expect(error.payload[:identifiers]).to include('A001') + end + end + + it 'treats case-different emails as the same duplicated identifier' do + csv = "Email,Midterm\n#{alice.user.email},40\n#{alice.user.email.upcase},50\n" + expect { service(csv_data: csv, components: components, identifier_mode: 'email').preview }. + to raise_error(described_class::ImportError) do |error| + expect(error.payload[:message]).to eq('duplicate_identifier') + end + end + + it 'writes nothing when an identifier is duplicated' do + csv = "External ID,Midterm\nA001,40\nA001,50\n" + expect do + expect { service(csv_data: csv, components: components).commit(on_conflict: 'replace') }. + to raise_error(described_class::ImportError) + end.not_to(change { Course::ExternalAssessmentGrade.count }) + end + end + + describe 'bad-header suggestions' do + let(:components) { [name: 'Midterms', weightage: 30, maximum_grade: 50] } + + it 'suggests the near-miss uploaded header for a missing expected header' do + csv = "External ID,Midterm\nA001,41\n" # header "Midterm" vs component "Midterms" + error = nil + begin + service(csv_data: csv, components: components).preview + rescue described_class::ImportError => e + error = e + end + expect(error).to be_present + expect(error.payload[:message]).to eq('bad_header') + expect(error.payload[:suggestions]).to include( + a_hash_including(expected: 'Midterms', didYouMean: 'Midterm') + ) + # The typo pair is surfaced as a suggestion, not double-reported as a + # plain missing/unrecognized column. + expect(error.payload[:missing]).to eq([]) + expect(error.payload[:unrecognized]).to eq([]) + end + + it 'omits a suggestion when no uploaded header is within the edit-distance threshold' do + csv = "External ID,Homework\nA001,41\n" # "Homework" is far from "Midterms" + error = nil + begin + service(csv_data: csv, components: components).preview + rescue described_class::ImportError => e + error = e + end + expect(error.payload[:suggestions]).to be_empty + end + end + + describe 'commit write batching' do + let(:roster) { [alice, bob] + create_list(:course_student, 8, course: course) } + + def write_query_count + count = 0 + table = Course::ExternalAssessmentGrade.table_name + + counter = lambda do |*, payload| + sql = payload[:sql].to_s + + count += 1 if sql =~ /\A\s*(INSERT|UPDATE)/i && + sql.include?(table) + end + + ActiveSupport::Notifications.subscribed(counter, 'sql.active_record') { yield } + + count + end + + def csv_for(students, value) + rows = students.map { |s| "#{s.external_id},#{value}" }.join("\n") + "External ID,Midterm\n#{rows}\n" + end + + it 'inserts many brand-new grades with a single INSERT statement' do + students = roster + students.each { |s| s.update!(external_id: "E#{s.id}") } + csv = csv_for(students, 41) + + writes = write_query_count do + service(csv_data: csv, components: components).commit(on_conflict: 'replace') + end + + expect(Course::ExternalAssessmentGrade.where(course_user: students).count).to eq(students.size) + expect(writes).to be <= 2 # one INSERT for the grades (+ at most the component create) + end + + it 'replaces many existing grades without one UPDATE per row' do + students = roster + students.each { |s| s.update!(external_id: "E#{s.id}") } + # First import seeds existing grades. + service(csv_data: csv_for(students, 41), components: components).commit(on_conflict: 'replace') + + writes = write_query_count do + service(csv_data: csv_for(students, 99), components: components).commit(on_conflict: 'replace') + end + + grades = Course::ExternalAssessmentGrade.where(course_user: students).where.not(grade: nil).pluck(:grade) + expect(grades).to all(eq(99)) + expect(writes).to be <= 1 # single upsert statement for all rows + end + + it 'keeps existing non-null grades but fills nil ones under keep' do + students = roster + students.each { |s| s.update!(external_id: "E#{s.id}") } + service(csv_data: csv_for(students, 41), components: components).commit(on_conflict: 'replace') + external = Course::ExternalAssessment.for_course(course).find_by!(title: 'Midterm') + blanked = students.first + external.external_assessment_grades.find_by(course_user: blanked).update_column(:grade, nil) + + service(csv_data: csv_for(students, 99), components: components).commit(on_conflict: 'keep') + grades = external.external_assessment_grades.where.not(course_user: blanked).pluck(:grade) + expect(grades).to all(eq(41)) + expect(external.external_assessment_grades.find_by(course_user: blanked).grade).to eq(99) + end + end + + describe 'header ordering' do + let(:components) do + [{ name: 'Midterm', weightage: 30, maximum_grade: 50 }, + { name: 'Final', weightage: 70, maximum_grade: 100 }] + end + + it 'returns column_order following the uploaded CSV header order (faithful preview)' do + ordered = "External ID,Midterm,Final\nA001,41,80\n" + shuffled = "External ID,Final,Midterm\nA001,80,41\n" + a = service(csv_data: ordered, components: components).preview[:column_order] + b = service(csv_data: shuffled, components: components).preview[:column_order] + expect(a).to eq(%w[Midterm Final]) + expect(b).to eq(%w[Final Midterm]) + end + + it 'keeps canonical (position) order in define-step order, not CSV header order' do + # CSV columns are in the opposite order to the defined components; the + # gradebook's canonical order must follow the defined (append) order. + csv = "External ID,Final,Midterm\nA001,80,41\n" + service(csv_data: csv, components: components).commit(on_conflict: 'replace') + titles = Course::ExternalAssessment.for_course(course).order(:position).pluck(:title) + expect(titles).to eq(%w[Midterm Final]) + end + + it 'blocks when the identifier is present but not the first column' do + csv = "Midterm,External ID,Final\n41,A001,80\n" + error = (service(csv_data: csv, components: components).preview rescue $ERROR_INFO) # rubocop:disable Style/RescueModifier + expect(error).to be_a(described_class::ImportError) + expect(error.payload[:message]).to eq('bad_header') + expect(error.payload[:identifierNotFirst]).to be(true) + end + + it 'does not flag identifierNotFirst when the identifier is first' do + csv = "External ID,Final,Midterm\nA001,80,41\n" + expect { service(csv_data: csv, components: components).preview }.not_to raise_error + end + + it 'treats a missing identifier as missing, not identifierNotFirst' do + csv = "Midterm,Final\n41,80\n" + begin + service(csv_data: csv, components: components).preview + rescue described_class::ImportError => e + expect(e.payload[:identifierNotFirst]).to be(false) + expect(e.payload[:missing]).to include('External ID') + end + end + end end end